Beispiel #1
0
def main():
    dataset = VOCDetectionDataset(year='2007', split='test')
    models = [
        ('Faster R-CNN', FasterRCNNVGG16(pretrained_model='voc07')),
        ('SSD300', SSD300(pretrained_model='voc0712')),
        ('SSD512', SSD512(pretrained_model='voc0712')),
    ]
    indices = [29, 301, 189, 229]

    fig = plot.figure(figsize=(30, 30))
    for i, idx in enumerate(indices):
        for j, (name, model) in enumerate(models):
            img, _, _ = dataset[idx]
            bboxes, labels, scores = model.predict([img])
            bbox, label, score = bboxes[0], labels[0], scores[0]

            ax = fig.add_subplot(len(indices), len(models),
                                 i * len(models) + j + 1)
            vis_bbox(img,
                     bbox,
                     label,
                     score,
                     label_names=voc_detection_label_names,
                     ax=ax)

            # Set MatplotLib parameters
            ax.set_aspect('equal')
            if i == 0:
                font = FontProperties()
                font.set_family('serif')
                ax.set_title(name, fontsize=35, y=1.03, fontproperties=font)
            plot.axis('off')
            plot.tight_layout()

    plot.show()
Beispiel #2
0
 def setUp(self):
     self.dataset = VOCDetectionDataset(
         split=self.split,
         year=self.year,
         use_difficult=self.use_difficult,
         return_difficult=self.return_difficult)
     self.n_out = 4 if self.return_difficult else 3
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('faster_rcnn', 'ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=32)
    args = parser.parse_args()

    if args.model == 'faster_rcnn':
        model = FasterRCNNVGG16(pretrained_model='voc07')
    elif args.model == 'ssd300':
        model = SSD300(pretrained_model='voc0712')
    elif args.model == 'ssd512':
        model = SSD512(pretrained_model='voc0712')

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    model.use_preset('evaluate')

    dataset = VOCDetectionDataset(year='2007',
                                  split='test',
                                  use_difficult=True,
                                  return_difficult=True)
    iterator = iterators.SerialIterator(dataset,
                                        args.batchsize,
                                        repeat=False,
                                        shuffle=False)

    pred_bboxes, pred_labels, pred_scores, gt_values = \
        apply_detection_link(model, iterator, hook=ProgressHook(len(dataset)))
    gt_bboxes, gt_labels, gt_difficults = gt_values

    eval_ = eval_detection_voc(pred_bboxes,
                               pred_labels,
                               pred_scores,
                               gt_bboxes,
                               gt_labels,
                               gt_difficults,
                               use_07_metric=True)

    print()
    print('mAP: {:f}'.format(eval_['map']))
    for l, name in enumerate(voc_detection_label_names):
        if l in eval_:
            print('{:s}: {:f}'.format(name, eval_[l]['ap']))
        else:
            print('{:s}: -'.format(name))
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    train_data = VOCDetectionDataset(split='trainval', year='2007')
    test_data = VOCDetectionDataset(split='test',
                                    year='2007',
                                    use_difficult=True,
                                    return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
        chainer.cuda.get_device(args.gpu).use()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    def transform(in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally flip
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'])

        return img, bbox, label, scale

    train_data = TransformDataset(train_data, transform)

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(
        DetectionVOCEvaluator(test_iter,
                              model.faster_rcnn,
                              use_07_metric=True,
                              label_names=voc_detection_label_names),
        trigger=ManualScheduleTrigger([args.step_size, args.iteration],
                                      'iteration'),
        invoke_before_training=False)

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Beispiel #5
0
def main():
    chainer.config.train = False

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('faster_rcnn', 'ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--pretrained_model')
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=32)
    args = parser.parse_args()

    if args.model == 'faster_rcnn':
        if args.pretrained_model:
            model = FasterRCNNVGG16(n_fg_class=20,
                                    pretrained_model=args.pretrained_model)
        else:
            model = FasterRCNNVGG16(pretrained_model='voc07')
    elif args.model == 'ssd300':
        if args.pretrained_model:
            model = SSD300(n_fg_class=20,
                           pretrained_model=args.pretrained_model)
        else:
            model = SSD300(pretrained_model='voc0712')
    elif args.model == 'ssd512':
        if args.pretrained_model:
            model = SSD512(n_fg_class=20,
                           pretrained_model=args.pretrained_model)
        else:
            model = SSD512(pretrained_model='voc0712')

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    model.use_preset('evaluate')

    dataset = VOCDetectionDataset(year='2007',
                                  split='test',
                                  use_difficult=True,
                                  return_difficult=True)
    iterator = iterators.SerialIterator(dataset,
                                        args.batchsize,
                                        repeat=False,
                                        shuffle=False)

    imgs, pred_values, gt_values = apply_prediction_to_iterator(
        model.predict, iterator, hook=ProgressHook(len(dataset)))
    # delete unused iterator explicitly
    del imgs

    pred_bboxes, pred_labels, pred_scores = pred_values
    gt_bboxes, gt_labels, gt_difficults = gt_values

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                use_07_metric=True)

    print()
    print('mAP: {:f}'.format(result['map']))
    for l, name in enumerate(voc_detection_label_names):
        if result['ap'][l]:
            print('{:s}: {:f}'.format(name, result['ap'][l]))
        else:
            print('{:s}: -'.format(name))
Beispiel #6
0
import numpy as np

import chainer

from chainercv.datasets import VOCDetectionDataset
from chainercv.extensions import DetectionVisReport
from chainercv.wrappers import bbox_resize_hook
from chainercv.wrappers import SubtractWrapper
from chainercv.wrappers import ResizeWrapper
from chainercv.wrappers import output_shape_soft_min_hard_max

from faster_rcnn import FasterRCNN

if __name__ == '__main__':
    test_data = VOCDetectionDataset(mode='train',
                                    use_cache=True,
                                    year='2007',
                                    bgr=True)
    wrappers = [
        lambda d: SubtractWrapper(d,
                                  value=np.array([103.939, 116.779, 123.68])),
        lambda d: ResizeWrapper(d,
                                preprocess_idx=0,
                                output_shape=output_shape_soft_min_hard_max(
                                    600, 1200),
                                hook=bbox_resize_hook(1)),
    ]
    for wrapper in wrappers:
        test_data = wrapper(test_data)

    model = FasterRCNN()
    chainer.serializers.load_npz('VGG16_faster_rcnn_final.model', model)
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_detection_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_detection_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCDetectionDataset(year='2007', split='trainval'),
                            VOCDetectionDataset(year='2012',
                                                split='trainval')),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = VOCDetectionDataset(year='2007',
                               split='test',
                               use_difficult=True,
                               return_difficult=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    trainer.extend(DetectionVOCEvaluator(
        test_iter,
        model,
        use_07_metric=True,
        label_names=voc_detection_label_names),
                   trigger=(10000, 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(120000, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Beispiel #8
0
from chainercv.datasets import VOCSemanticSegmentationDataset
from chainercv.datasets \
    import voc_semantic_segmentation_label_colors
from chainercv.datasets \
    import voc_semantic_segmentation_label_names
from chainercv.visualizations import vis_image
from chainercv.visualizations import vis_label
import matplotlib.pyplot as plot


fig = plot.figure(figsize=(26, 10))
ax1 = fig.add_subplot(1, 2, 1)
plot.axis('off')
ax2 = fig.add_subplot(1, 2, 2)
plot.axis('off')
dataset = VOCDetectionDataset()
img, bbox, label = dataset[310]

vis_bbox(img, bbox, label,
        label_names=voc_detection_label_names,
         ax=ax1)

dataset = VOCSemanticSegmentationDataset()
img, label = dataset[30]
vis_image(img, ax=ax2)
_, legend_handles = vis_label(
    label,
    label_names=voc_semantic_segmentation_label_names,
    label_colors=voc_semantic_segmentation_label_colors,
    alpha=0.9, ax=ax2)
# ax2.legend(handles=legend_handles, bbox_to_anchor=(1, 1), loc=2)
Beispiel #9
0
            plt.Rectangle(xy,
                          width,
                          height,
                          fill=False,
                          edgecolor='red',
                          linewidth=3))
        if label_names is not None:
            ax.text(bbox[0],
                    bbox[1],
                    label_names[bbox[4].astype(np.int)],
                    style='italic',
                    bbox={
                        'facecolor': 'white',
                        'alpha': 0.7,
                        'pad': 10
                    })


if __name__ == '__main__':
    from chainercv.datasets import VOCDetectionDataset

    dataset = VOCDetectionDataset()

    for i in range(0, 100, 4):
        fig = plt.figure(figsize=(14, 14))
        for j in range(4):
            ax = fig.add_subplot(2, 2, j + 1)
            img, bboxes = dataset.get_raw_data(i + j)
            vis_img_bbox(img, bboxes, dataset.labels, ax)
        plt.show()
            label_names = getattr(self.dataset, 'labels', None)
            vis_img_bbox(vis_img,
                         raw_bboxes,
                         label_names=label_names,
                         ax=ax_gt)

            ax_pred = fig.add_subplot(2, 1, 2)
            ax_pred.set_title('prediction')
            vis_img_bbox(vis_img, bboxes, label_names=label_names, ax=ax_pred)

            plt.savefig(out_file)


if __name__ == '__main__':
    from chainercv.datasets import VOCDetectionDataset
    from chainercv.testing import ConstantReturnModel
    import mock
    import tempfile
    train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007')
    _, bbox = train_data.get_example(3)

    model = ConstantReturnModel(bbox[None])

    trainer = mock.MagicMock()
    out_dir = tempfile.mkdtemp()
    print('outdir ', out_dir)
    trainer.out = out_dir
    trainer.updater.iteration = 0
    extension = DetectionVisReport([3], train_data, model)
    extension(trainer)
Beispiel #11
0
def main(gpu=-1, epoch=100, batch_size=1, lr=5e-4, out='result'):
    train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007')
    test_data = VOCDetectionDataset(mode='val', use_cache=True, year='2007')

    def transform(in_data):
        img, bbox = in_data
        img -= np.array([103.939, 116.779, 123.68])[:, None, None]

        # Resize bounding box to a shape
        # with the smaller edge at least at length 600
        input_shape = img.shape[1:]
        output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200)
        img = transforms.resize(img, output_shape)
        bbox = transforms.bbox_resize(bbox, input_shape, output_shape)

        # horizontally flip
        img, flips = transforms.random_flip(img,
                                            horizontal_flip=True,
                                            return_flip=True)
        h_flip = flips['h']
        bbox = transforms.bbox_flip(bbox, output_shape, h_flip)
        return img, bbox

    transforms.extend(train_data, transform)
    transforms.extend(test_data, transform)

    model = FasterRCNN(gpu=gpu)
    if gpu != -1:
        model.to_gpu(gpu)
        chainer.cuda.get_device(gpu).use()
    # optimizer = chainer.optimizers.MomentumSGD(lr=lr)
    optimizer = chainer.optimizers.Adam(alpha=0.001,
                                        beta1=0.9,
                                        beta2=0.999,
                                        eps=1e-8)
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_iter = chainer.iterators.SerialIterator(test_data, batch_size=1)
    updater = ParallelUpdater(train_iter, optimizer, devices={'main': gpu})

    # updater = chainer.training.updater.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)

    log_interval = 20, 'iteration'
    val_interval = 3000, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration', 'main/time', 'main/rpn_loss_cls', 'main/rpn_loss_bbox',
        'main/loss_cls', 'main/loss_bbox'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # visualize training
    trainer.extend(extensions.PlotReport(['main/rpn_loss_cls'],
                                         file_name='rpn_loss_cls.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/rpn_loss_bbox'],
                                         file_name='rpn_loss_bbox.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/loss_cls'],
                                         file_name='loss_cls.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/loss_bbox'],
                                         file_name='loss_bbox.png'),
                   trigger=log_interval)
    trainer.extend(
        DetectionVisReport(
            range(10),  # visualize outputs for the first 10 data of test_data
            train_data,
            model,
            filename_base='detection_train',
            predict_func=model.predict_bboxes),
        trigger=val_interval,
        invoke_before_training=True)
    trainer.extend(
        DetectionVisReport(
            range(10),  # visualize outputs for the first 10 data of test_data
            test_data,
            model,
            forward_func=model.predict_bboxes),
        trigger=val_interval,
        invoke_before_training=True)

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset',
                        choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCDetectionDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCDetectionDataset(year='2007', split='trainval'),
            VOCDetectionDataset(year='2012', split='trainval'))
    test_data = VOCDetectionDataset(split='test',
                                    year='2007',
                                    use_difficult=True,
                                    return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(
        DetectionVOCEvaluator(test_iter,
                              model.faster_rcnn,
                              use_07_metric=True,
                              label_names=voc_detection_label_names),
        trigger=ManualScheduleTrigger([args.step_size, args.iteration],
                                      'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()