Ejemplo n.º 1
0
def main():
    opt = opts.parse()
    model = net.ConvNet(opt.n_classes, opt.BC, opt.nobias, opt.dropout_ratio)
    if opt.gpu > -1:
        chainer.cuda.get_device_from_id(opt.gpu).use()
        model.to_gpu()
    optimizer = optimizers.NesterovAG(lr=opt.LR, momentum=opt.momentum)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(opt.weight_decay))
    train_iter, val_iter = dataset.setup(opt)
    updater = training.StandardUpdater(train_iter, optimizer, device=opt.gpu)
    # Trainer
    trainer = training.Trainer(updater, (opt.n_epochs, 'epoch'), opt.save)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, opt.LR),
                   trigger=ManualScheduleTrigger(opt.schedule, 'epoch'))
    trainer.extend(extensions.Evaluator(val_iter, model,
                                        device=opt.gpu), trigger=(1, 'epoch'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(filename='min_loss'), trigger=MinValueTrigger(
        key='validation/main/loss', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot(filename='max_accuracy'), trigger=MaxValueTrigger(
        key='validation/main/accuracy', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot_object(model, 'min_loss_model'),
                   trigger=MinValueTrigger(key='validation/main/loss', trigger=(5, 'epoch')))
    trainer.extend(extensions.snapshot_object(model, 'max_accuracy_model'),
                   trigger=MaxValueTrigger(key='validation/main/accuracy', trigger=(5, 'epoch')))
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.LogReport())
    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
        trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'))
        trainer.extend(extensions.PlotReport(
            ['lr'], 'epoch', file_name='learning_rate.png'))
    trainer.extend(extensions.PrintReport(['elapsed_time', 'epoch', 'iteration', 'lr',
                                           'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar(update_interval=25))
    if opt.resume and os.path.exists(opt.resume):
        chainer.serializers.load_npz(opt.resume, trainer)
    # Run the training
    try:
        trainer.run()
    except Exception as e:
        import shutil
        import traceback
        print('\nerror message')
        print(traceback.format_exc())
        shutil.rmtree(opt.save)
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--lr', '-l', type=float, default=0.0005)
    parser.add_argument('--lr-cooldown-factor',
                        '-lcf',
                        type=float,
                        default=0.1)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28)
    args = parser.parse_args()

    np.random.seed(args.seed)

    # dataset
    train_dataset = SBDInstanceSegmentationDataset(split='train')
    test_dataset = SBDInstanceSegmentationDataset(split='val')

    # model
    fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names),
                         pretrained_model='imagenet',
                         iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)

    # gpu
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    train_dataset = TransformDataset(train_dataset, Transform(model.fcis))

    # iterator
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)
    test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=args.gpu)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift(
        'lr', args.lr_cooldown_factor, init=args.lr),
                   trigger=(args.cooldown_epoch, 'epoch'))

    # interval
    log_interval = 100, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    # training extensions
    trainer.extend(extensions.snapshot_object(model.fcis,
                                              filename='snapshot_model.npz'),
                   trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(
        extensions.LogReport(log_name='log.json', trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/roi_mask_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(InstanceSegmentationVOCEvaluator(
        test_iter,
        model.fcis,
        iou_thresh=0.5,
        use_07_metric=True,
        label_names=sbd_instance_segmentation_label_names),
                   trigger=ManualScheduleTrigger([
                       len(train_dataset) * args.cooldown_epoch,
                       len(train_dataset) * args.epoch
                   ], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 3
0
def main():
    bbox_label_names = ('loop')

    n_itrs = 70000
    n_step = 50000
    np.random.seed(0)
    train_data = DefectDetectionDataset(split='train')
    test_data = DefectDetectionDataset(split='test')
    proposal_params = {'min_size': 8}

    faster_rcnn = FasterRCNNVGG16(n_fg_class=1, pretrained_model='imagenet', ratios=[0.5, 1, 2],
                                  anchor_scales=[1, 4, 8, 16], min_size=512, max_size=1024,
                                  proposal_creator_params=proposal_params)
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    chainer.cuda.get_device_from_id(0).use()
    model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=1e-3, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    train_data = TransformDataset(train_data, Transform(faster_rcnn))
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, batch_size=1, n_processes=None, shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(
        test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, device=0)
    trainer = training.Trainer(
        updater, (n_itrs, 'iteration'), out='result')
    trainer.extend(
        extensions.snapshot_object(model.faster_rcnn, 'snapshot_model_{.updater.iteration}.npz'), 
        trigger=(n_itrs/5, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(n_step, 'iteration'))
    log_interval = 50, 'iteration'
    plot_interval = 100, 'iteration'
    print_interval = 20, 'iteration'
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss',
         'main/roi_loc_loss',
         'main/roi_cls_loss',
         'main/rpn_loc_loss',
         'main/rpn_cls_loss',
         'validation/main/map',
         ]), trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=5))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss'],
                file_name='loss.png', trigger=plot_interval
            ),
            trigger=plot_interval
        )
    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model.faster_rcnn, use_07_metric=True,
            label_names=bbox_label_names),
        trigger=ManualScheduleTrigger(
            [100, 500, 1000, 5000, 10000, 20000, 40000, 60000, n_step, n_itrs], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config_path',
                        type=str,
                        default='configs/base.yml',
                        help='path to config file')
    parser.add_argument('--results_dir',
                        type=str,
                        default='./result/',
                        help='directory to save the results to')
    parser.add_argument('--resume',
                        type=str,
                        default='',
                        help='path to the snapshot')
    parser.add_argument('--process_num', type=int, default=0)
    parser.add_argument('--seed', type=int, default=42)

    args = parser.parse_args()
    config = yaml_utils.Config(
        yaml.load(open(args.config_path), Loader=yaml.SafeLoader))
    pattern = "-".join([
        config.pattern, config.models['classifier']['name'],
        config.dataset['dataset_name']
    ])
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))
        print('Num Minibatch-size: {}'.format(config.batchsize))
        print('Num Epoch: {}'.format(config.epoch))
        print('==========================================')

    # Model
    classifier = load_models(config.models['classifier'])

    if args.resume:
        print("Resume training with snapshot:{}".format(args.resume))
        chainer.serializers.load_npz(args.resume, classifier)

    chainer.cuda.get_device_from_id(device).use()
    classifier.to_gpu()
    # models = {"classifier": classifier}

    # Optimizer
    opt = make_optimizer(classifier, comm, config)
    opt.add_hook(chainer.optimizer.WeightDecay(5e-4))

    # Dataset
    if comm.rank == 0:
        dataset = yaml_utils.load_dataset(config)
        first_size = int(len(dataset) * config.train_val_split_ratio)
        train, val = chainer.datasets.split_dataset_random(dataset,
                                                           first_size,
                                                           seed=args.seed)
    else:
        yaml_utils.load_module(config.dataset['dataset_func'],
                               config.dataset['dataset_name'])
        train, val = None, None

    train = chainermn.scatter_dataset(train, comm)
    val = chainermn.scatter_dataset(val, comm)

    # Iterator
    train_iterator = chainer.iterators.SerialIterator(train, config.batchsize)
    val_iterator = chainer.iterators.SerialIterator(val,
                                                    config.batchsize,
                                                    repeat=False,
                                                    shuffle=False)
    kwargs = config.updater['args'] if 'args' in config.updater else {}
    kwargs.update({
        'classifier': classifier,
        'iterator': train_iterator,
        'optimizer': opt,
        'device': device,
    })

    # Updater
    updater = yaml_utils.load_updater_class(config)
    updater = updater(**kwargs)
    out = args.results_dir + '/' + pattern

    if comm.rank == 0:
        create_result_dir(out, args.config_path, config)

    # Trainer
    trainer = training.Trainer(updater, (config.epoch, 'epoch'), out=out)

    # Evaluator
    evaluator = ClassifierEvaluator(val_iterator, classifier, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator)

    # Learning Rate Schedule (fixed)
    schedule = [config.epoch * 0.3, config.epoch * 0.6, config.epoch * 0.8]
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=ManualScheduleTrigger(schedule, 'epoch'))

    report_keys = [
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy', 'elapsed_time'
    ]
    if comm.rank == 0:
        # Set up logging
        trainer.extend(extensions.snapshot_object(
            classifier, 'classifier{}.npz'.format(args.process_num)),
                       trigger=MaxValueTrigger('validation/main/accuracy'))
        trainer.extend(
            extensions.LogReport(keys=report_keys,
                                 trigger=(config.display_interval, 'epoch')))
        trainer.extend(extensions.PrintReport(report_keys),
                       trigger=(config.display_interval, 'epoch'))
        trainer.extend(
            extensions.ProgressBar(
                update_interval=config.progressbar_interval))
    # Run the training
    trainer.run()
Ejemplo n.º 5
0
def main():
    args = parse()
    np.random.seed(args.seed)
    print('arguments: ', args)

    # Model setup
    if args.dataset == 'coco2017':
        train_data = COCODataset()
    test_data = COCODataset(json_file='instances_val2017.json',
                            name='val2017',
                            id_list_file='val2017.txt')
    if args.extractor == 'vgg16':
        mask_rcnn = MaskRCNNVGG16(n_fg_class=80,
                                  pretrained_model=args.pretrained,
                                  roi_size=args.roi_size,
                                  roi_align=args.roialign)
    elif args.extractor == 'resnet50':
        mask_rcnn = MaskRCNNResNet(n_fg_class=80,
                                   pretrained_model=args.pretrained,
                                   roi_size=args.roi_size,
                                   n_layers=50,
                                   roi_align=args.roialign)
    elif args.extractor == 'resnet101':
        mask_rcnn = MaskRCNNResNet(n_fg_class=80,
                                   pretrained_model=args.pretrained,
                                   roi_size=args.roi_size,
                                   n_layers=101,
                                   roi_align=args.roialign)
    mask_rcnn.use_preset('evaluate')
    model = MaskRCNNTrainChain(mask_rcnn,
                               gamma=args.gamma,
                               roi_size=args.roi_size)

    # Trainer setup
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    #optimizer = chainer.optimizers.Adam()#alpha=0.001, beta1=0.9, beta2=0.999 , eps=0.00000001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001))

    train_data = TransformDataset(train_data, Transform(mask_rcnn))
    test_data = TransformDataset(test_data, Transform(mask_rcnn))
    train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = SubDivisionUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    # Extensions
    trainer.extend(extensions.snapshot_object(model.mask_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.snapshot, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 10),
                   trigger=ManualScheduleTrigger([args.lr_initialchange],
                                                 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.lr_step, 'iteration'))
    if args.resume is not None:
        chainer.serializers.load_npz(args.resume, model.mask_rcnn)
    log_interval = 40, 'iteration'
    plot_interval = 160, 'iteration'
    print_interval = 40, 'iteration'

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),
                   trigger=(args.validation, 'iteration'))
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/avg_loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/roi_mask_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/loss',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=1000))
    trainer.extend(extensions.dump_graph('main/loss'))
    try:
        trainer.run()
    except:
        traceback.print_exc()
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate will be multiplied by the number of gpu')
    parser.add_argument('--lr-cooldown-factor',
                        '-lcf',
                        type=float,
                        default=0.1)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=list, default=[28, 31])
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    fcis = FCISPSROIAlignResNet101(
        n_fg_class=len(sbd_instance_segmentation_label_names),
        pretrained_model='imagenet',
        iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        SBDInstanceSegmentationDataset(split='train'),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    if comm.rank == 0:
        test_dataset = SBDInstanceSegmentationDataset(split='val')
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9),
        comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift(
        'lr', args.lr_cooldown_factor, init=args.lr * comm.size),
                   trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch'))

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        model_name = model.fcis.__class__.__name__

        trainer.extend(extensions.snapshot_object(
            model.fcis,
            filename='%s_model_iter_{.updater.iteration}.npz' % model_name),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationVOCEvaluator(
            test_iter,
            model.fcis,
            iou_thresh=0.5,
            use_07_metric=True,
            label_names=sbd_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger(args.cooldown_epoch,
                                                     'epoch'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate will be multiplied by the number of gpu')
    parser.add_argument('--no-ohem', action='store_true')
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    proposal_creator_params = {
        'nms_thresh': 0.7,
        'n_train_pre_nms': 12000,
        'n_train_post_nms': 2000,
        'n_test_pre_nms': 6000,
        'n_test_post_nms': 1000,
        'force_cpu_nms': False,
        'min_size': 0
    }

    fcis = FCISPSROIAlignResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        min_size=800,
        max_size=1333,
        anchor_scales=(2, 4, 8, 16, 32),
        pretrained_model='imagenet',
        iter2=False,
        proposal_creator_params=proposal_creator_params)
    fcis.use_preset('coco_evaluate')
    if args.no_ohem:
        model = FCISTrainChain(
            fcis,
            n_ohem_sample=None,
            proposal_target_creator=ProposalTargetCreator(n_sample=128))
    else:
        model = FCISTrainChain(fcis)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        ConcatenatedDataset(
            COCOInstanceSegmentationDataset(split='train'),
            COCOInstanceSegmentationDataset(split='valminusminival')),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    test_dataset = COCOInstanceSegmentationDataset(split='minival',
                                                   use_crowded=True,
                                                   return_crowded=True,
                                                   return_area=True)
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    if comm.rank == 0:
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9),
        comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    converter = functools.partial(
        concat_examples,
        padding=0,
        # img, masks, labels, bboxes, scales
        indices_concat=[0, 1, 2, 4],  # img, masks, labels, _, scales
        indices_to_device=[0],  # img
    )

    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       converter=converter,
                                                       device=device)

    trainer = chainer.training.Trainer(updater, (18, 'epoch'), out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift('lr',
                                                                0.1,
                                                                init=args.lr *
                                                                comm.size),
                   trigger=ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 10, 'iteration'

        # training extensions
        model_name = model.fcis.__class__.__name__
        trainer.extend(chainer.training.extensions.snapshot_object(
            model.fcis,
            savefun=chainer.serializers.save_npz,
            filename='%s_model_iter_{.updater.iteration}.npz' % model_name),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]

        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.fcis,
            label_names=coco_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger(
                           [len(train_dataset) * 12,
                            len(train_dataset) * 15], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 8
0
def main():
    experiment_name = "Stacked_16_16_16_16"
    snapshot_name = "snapshot_iter_27215"
    config_path = "/efs/fMRI_AE/{}/log/config.yml".format(experiment_name)
    config = load_config(config_path)
    config["additional information"]["mask"]["loader"]["params"][
        "mask_path"] = path.join(
            config["additional information"]["mask"]["directory"],
            config["additional information"]["mask"]["file"])
    config["additional information"]["mask"]["loader"]["params"][
        "crop"] = config["additional information"]["crop"]
    snapshot_path = "/efs/fMRI_AE/{}/model/{}".format(experiment_name,
                                                      snapshot_name)
    # print("configured as follows:")
    # print(yaml_dump(config))
    while True:
        s = input("ok? (y/n):")
        if s == 'y' or s == 'Y':
            log_config(config, "training start")
            break
        elif s == 'n' or s == 'N':
            destroy_config(config)
            exit(1)
    try:
        try:
            print("mask loading...")
            load_mask_module = import_module(
                config["additional information"]["mask"]["loader"]["module"],
                config["additional information"]["mask"]["loader"]["package"])
            load_mask = getattr(
                load_mask_module,
                config["additional information"]["mask"]["loader"]["function"])
            mask = load_mask(
                **config["additional information"]["mask"]["loader"]["params"])
            print("done.")
            print("mask.shape: {}".format(mask.shape))
        except FileNotFoundError as e:
            raise e

        model_module = import_module(config["model"]["module"],
                                     config["model"]["package"])
        Model = getattr(model_module, config["model"]["class"])
        model = Model(mask=mask, **config["model"]["params"])
        finetune_config = config["additional information"]["finetune"]
        if finetune_config is not None:
            load_npz(path.join(finetune_config["directory"],
                               finetune_config["file"]),
                     model,
                     strict=False)

        try:
            chainer.cuda.get_device_from_id(0).use()
            gpu = 0
            print("transferring model to GPU...")
            model.to_gpu(gpu)
            print("GPU enabled")
        except RuntimeError:
            gpu = -1
            print("GPU disabled")

        dataset_module = import_module(config["dataset"]["module"],
                                       config["dataset"]["package"])
        Dataset = getattr(dataset_module, config["dataset"]["class"])
        train_dataset = Dataset(**config["dataset"]["train"]["params"])
        valid_dataset = Dataset(**config["dataset"]["valid"]["params"])

        train_iterator = Iterator(train_dataset, config["batch"]["train"],
                                  True, True)
        valid_iterator = Iterator(valid_dataset, config["batch"]["valid"],
                                  False, False)

        Optimizer = getattr(chainer.optimizers, config["optimizer"]["class"])
        optimizer = Optimizer(**config["optimizer"]["params"])

        optimizer.setup(model)

        for hook_config in config["optimizer"]["hook"]:
            hook_module = import_module(hook_config["module"],
                                        hook_config["package"])
            Hook = getattr(hook_module, hook_config["class"])
            hook = Hook(**hook_config["params"])
            optimizer.add_hook(hook)

        updater = Updater(train_iterator, optimizer, device=gpu)

        trainer = Trainer(updater, **config["trainer"]["params"])
        trainer.extend(snapshot(),
                       trigger=config["trainer"]["snapshot_interval"])
        trainer.extend(snapshot_object(model,
                                       "model_iter_{.updater.iteration}"),
                       trigger=config["trainer"]["model_interval"])
        trainer.extend(observe_lr(), trigger=config["trainer"]["log_interval"])
        trainer.extend(
            LogReport([
                "epoch", "iteration", "main/loss", "main/pca_loss",
                "main/reconstruction_loss", "validation/main/loss"
            ],
                      trigger=config["trainer"]["log_interval"]))
        trainer.extend(Evaluator(valid_iterator, model, device=gpu),
                       trigger=config["trainer"]["eval_interval"])
        trainer.extend(PrintReport([
            "epoch", "iteration", "main/loss", "main/pca_loss",
            "main/reconstruction_loss", "validation/main/loss"
        ]),
                       trigger=config["trainer"]["log_interval"])
        trainer.extend(ProgressBar(update_interval=1))

        if "schedule" in config["additional information"].keys():
            for i, interval_funcs in enumerate(
                    config["additional information"]["schedule"].items()):
                interval, funcs = interval_funcs
                f = lambda trainer, funcs=funcs: [
                    trainer.updater.get_optimizer('main').target.
                    __getattribute__(func["function"])(*func["params"])
                    for func in funcs
                ]
                trainer.extend(f,
                               name="schedule_{}".format(i),
                               trigger=ManualScheduleTrigger(*interval))

        load_npz(snapshot_path, trainer)
        target = trainer.updater.get_optimizer("main").target
        target.reset_pca()
        target.attach_pca()
        ipca_param = np.load(
            "/efs/fMRI_AE/Stacked_8_8_8_8_feature/ipca_mean_7920_components_990_7920.npz"
        )
        target.pca.W = chainer.Parameter(ipca_param["components"])
        target.pca.bias = chainer.Parameter(ipca_param["mean"])
        target.pca.disable_update()
        target.pca.to_gpu(gpu)
        target.detach_pca_loss()
        target.attach_reconstruction_loss()
        target.release_decoder()
        target.freeze_encoder()

        trainer.run()
        log_config(config, "succeeded")

    except Exception as e:
        log_config(config, "unintentional termination")
        raise e
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--train_data_dir', '-t', default=WIDER_TRAIN_DIR,
                        help='Training dataset (WIDER_train)')
    parser.add_argument('--train_annotation', '-ta', default=WIDER_TRAIN_ANNOTATION_MAT,
                        help='Annotation file (.mat) for training dataset')
    parser.add_argument('--val_data_dir', '-v', default=WIDER_VAL_DIR,
                        help='Validation dataset (WIDER_train)')
    parser.add_argument('--val_annotation', '-va', default=WIDER_VAL_ANNOTATION_MAT,
                        help='Annotation file (.mat) for validation dataset')
    args = parser.parse_args()

    np.random.seed(args.seed)

    # for logging pocessed files
    logger = logging.getLogger('logger')
    logger.setLevel(logging.DEBUG)
    handler = logging.FileHandler(filename='filelog.log')
    handler.setLevel(logging.DEBUG)
    logger.addHandler(handler)
    
    blacklist = []
    with open(BLACKLIST_FILE, 'r') as f:
        for line in f:
            l = line.strip()
            if l:
                blacklist.append(line.strip())
    
    # train_data = VOCDetectionDataset(split='trainval', year='2007')
    # test_data = VOCDetectionDataset(split='test', year='2007',
                                    # use_difficult=True, return_difficult=True)
    train_data = WIDERFACEDataset(args.train_data_dir, args.train_annotation, 
        logger=logger, exclude_file_list=blacklist)
    test_data = WIDERFACEDataset(args.val_data_dir, args.val_annotation)
    # faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  # pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
        chainer.cuda.get_device(args.gpu).use()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))


    train_data = TransformDataset(train_data, transform)
    #import pdb; pdb.set_trace()
    #train_iter = chainer.iterators.MultiprocessIterator(
    #    train_data, batch_size=1, n_processes=None, shared_mem=100000000)
    train_iter = chainer.iterators.SerialIterator(
        train_data, batch_size=1)
    test_iter = chainer.iterators.SerialIterator(
        test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)

    trainer.extend(
        extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'),
        trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss',
         'main/roi_loc_loss',
         'main/roi_cls_loss',
         'main/rpn_loc_loss',
         'main/rpn_cls_loss',
         'validation/main/map',
         ]), trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss'],
                file_name='loss.png', trigger=plot_interval
            ),
            trigger=plot_interval
        )

    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model.faster_rcnn, use_07_metric=True,
            label_names=('face',)),
        trigger=ManualScheduleTrigger(
            [args.step_size, args.iteration], 'iteration'),
        invoke_before_training=False)

    trainer.extend(extensions.dump_graph('main/loss'))

    #try:
        # warnings.filterwarnings('error', category=RuntimeWarning)
    trainer.run()
Ejemplo n.º 10
0
def main(config):
    opts = config()

    comm = chainermn.create_communicator(opts.communicator)
    device = comm.intra_rank

    backborn_cfg = opts.backborn_cfg

    df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1)

    ################### pseudo labeling #########################
    if opts.pseudo_labeling_path is not None:
        test_df = pd.read_csv(opts.path_data + opts.test_df)
        labels = np.load(opts.pseudo_labeling_path, allow_pickle=False)
        labels = np.concatenate((labels, labels))
        count = 0
        valid_array = []
        valid_sirna = []
        for i, label in enumerate(labels):
            if label.max() > 0.0013:
                count = count + 1
                valid_array.append(i)
                valid_sirna.append(label.argmax())
        print(count)
        pseudo_df = test_df.iloc[valid_array, :]
        pseudo_df["sirna"] = valid_sirna
        pseudo_df = pseudo_df
        df = pd.concat([df, pseudo_df]).sample(frac=1)
    ################### pseudo labeling #########################

    for i, (train_df, valid_df) in enumerate(
            stratified_groups_kfold(df,
                                    target=opts.fold_target,
                                    n_splits=opts.fold)):
        if comm.rank == 0:
            train_df.to_csv(
                opts.path_data + 'train' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            valid_df.to_csv(
                opts.path_data + 'valid' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            print("Save a csvfile of fold_" + str(i))
        dataset = opts.dataset
        train_dataset = dataset(train_df, opts.path_data)
        val_dataset = dataset(valid_df, opts.path_data)

        backborn = chcv2_get_model(
            backborn_cfg['name'],
            pretrained=backborn_cfg['pretrain'],
            in_size=opts.input_shape)[backborn_cfg['layer']]

        model = opts.model(backborn=backborn).copy(mode='init')
        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()

        mean = opts.mean

        train_data = TransformDataset(train_dataset, opts.train_transform)
        val_data = TransformDataset(val_dataset, opts.valid_trainsform)

        if comm.rank == 0:
            train_indices = train_data
            val_indices = val_data
        else:
            train_indices = None
            val_indices = None

        train_data = chainermn.scatter_dataset(train_indices,
                                               comm,
                                               shuffle=True)
        val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False)
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            opts.batchsize,
            shuffle=True,
            n_processes=opts.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val_data,
            opts.batchsize,
            repeat=False,
            shuffle=False,
            n_processes=opts.loaderjob)
        print('finished loading dataset')

        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()
        if opts.optimizer == "CorrectedMomentumSGD":
            optimizer = chainermn.create_multi_node_optimizer(
                CorrectedMomentumSGD(lr=opts.lr), comm)
        elif opts.optimizer == "NesterovAG":
            optimizer = chainermn.create_multi_node_optimizer(
                NesterovAG(lr=opts.lr), comm)
        else:
            optimizer = chainermn.create_multi_node_optimizer(
                Adam(alpha=opts.alpha,
                     weight_decay_rate=opts.weight_decay,
                     adabound=True,
                     final_lr=0.5), comm)

        optimizer.setup(model)
        if opts.optimizer == "CorrectedMomentumSGD":
            for param in model.params():
                if param.name not in ('beta', 'gamma'):
                    param.update_rule.add_hook(WeightDecay(opts.weight_decay))

        if opts.fc_lossfun == 'softmax_cross_entropy':
            fc_lossfun = F.softmax_cross_entropy
        elif opts.fc_lossfun == 'focal_loss':
            if opts.ls:
                focal_loss = FocalLoss(label_smoothing=True)
            else:
                focal_loss = FocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss':
            if opts.ls:
                focal_loss = AutoFocalLoss(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss_bce':
            if opts.ls:
                focal_loss = AutoFocalLossBCE(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        if opts.metric_lossfun == 'arcface':
            arcface = ArcFace()
            metric_lossfun = arcface.loss
        elif opts.metric_lossfun == 'adacos':
            adacos = AdaCos()
            metric_lossfun = adacos.loss

        updater = opts.updater(train_iter,
                               optimizer,
                               model,
                               device=device,
                               max_epoch=opts.max_epoch,
                               fix_sche=opts.fix_sche,
                               metric_lossfun=metric_lossfun,
                               fc_lossfun=fc_lossfun,
                               metric_w=opts.metric_w,
                               fc_w=opts.fc_w)
        evaluator = chainermn.create_multi_node_evaluator(
            opts.evaluator(val_iter,
                           model,
                           device=device,
                           max_epoch=opts.max_epoch,
                           fix_sche=opts.fix_sche,
                           metric_lossfun=metric_lossfun,
                           fc_lossfun=fc_lossfun,
                           metric_w=opts.metric_w,
                           fc_w=opts.fc_w), comm)

        trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'),
                                   out=opts.out + '_fold' + str(i))

        if opts.optimizer == "CorrectedMomentumSGD":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        elif opts.optimizer == "NesterovAG":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        else:
            trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))

        trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch'))
        #         trainer.extend(evaluator, trigger=(int(1), 'epoch'))
        log_interval = 0.1, 'epoch'
        print_interval = 0.1, 'epoch'

        if comm.rank == 0:
            trainer.extend(chainer.training.extensions.observe_lr(),
                           trigger=log_interval)
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model' + '_{.updater.epoch}.npz'),
                           trigger=(opts.max_epoch / 10, 'epoch'))
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model_f1max.npz'),
                           trigger=chainer.training.triggers.MaxValueTrigger(
                               'validation/main/accuracy',
                               trigger=(opts.max_epoch / 10, 'epoch')))
            trainer.extend(extensions.LogReport(trigger=log_interval))
            trainer.extend(extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/face_loss', 'main/ce_loss', 'main/accuracy',
                'validation/main/loss', 'validation/main/face_loss',
                'validation/main/ce_loss', 'validation/main/accuracy'
            ]),
                           trigger=print_interval)
            trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.run()
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate should be multiplied by the number of gpu')
    parser.add_argument('--epoch', '-e', type=int, default=18)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12)
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    proposal_creator_params = FCISResNet101.proposal_creator_params
    proposal_creator_params['min_size'] = 2
    fcis = FCISResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        anchor_scales=(4, 8, 16, 32),
        pretrained_model='imagenet',
        iter2=False,
        proposal_creator_params=proposal_creator_params)
    fcis.use_preset('coco_evaluate')
    proposal_target_creator = ProposalTargetCreator()
    proposal_target_creator.neg_iou_thresh_lo = 0.0
    model = FCISTrainChain(fcis,
                           proposal_target_creator=proposal_target_creator)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # train dataset
    train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train')
    vmml_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                   split='valminusminival')

    # filter non-annotated data
    train_indices = np.array([
        i for i, label in enumerate(train_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                             dtype=np.int32)
    train_dataset = train_dataset.slice[train_indices]
    vmml_indices = np.array([
        i for i, label in enumerate(vmml_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                            dtype=np.int32)
    vmml_dataset = vmml_dataset.slice[vmml_indices]

    train_dataset = TransformDataset(
        ConcatenatedDataset(train_dataset, vmml_dataset),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    # test dataset
    if comm.rank == 0:
        test_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                       split='minival',
                                                       use_crowded=True,
                                                       return_crowded=True,
                                                       return_area=True)
        indices = np.arange(len(test_dataset))
        test_dataset = test_dataset.slice[indices]
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    @make_shift('lr')
    def lr_scheduler(trainer):
        base_lr = args.lr

        iteration = trainer.updater.iteration
        epoch = trainer.updater.epoch
        if (iteration * comm.size) < 2000:
            rate = 0.1
        elif epoch < args.cooldown_epoch:
            rate = 1
        else:
            rate = 0.1
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]
        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.fcis,
            label_names=coco_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 12
0
def main():
    config = get_config()
    # print("configured as follows:")
    # print(yaml_dump(config))
    while True:
        s = input("ok? (y/n):")
        if s == 'y' or s == 'Y':
            log_config(config, "training start")
            break
        elif s == 'n' or s == 'N':
            destroy_config(config)
            exit(1)
    try:
        try:
            print("mask loading...")
            load_mask_module = import_module(
                config["additional information"]["mask"]["loader"]["module"],
                config["additional information"]["mask"]["loader"]["package"])
            load_mask = getattr(
                load_mask_module,
                config["additional information"]["mask"]["loader"]["function"])
            mask = load_mask(
                **config["additional information"]["mask"]["loader"]["params"])
            print("done.")
            print("mask.shape: {}".format(mask.shape))
        except FileNotFoundError as e:
            raise e

        model_module = import_module(config["model"]["module"],
                                     config["model"]["package"])
        Model = getattr(model_module, config["model"]["class"])
        model = Model(mask=mask, **config["model"]["params"])
        finetune_config = config["additional information"][
            "finetune"] if "finetune" in config[
                "additional information"] else None
        if finetune_config is not None:
            load_npz(path.join(finetune_config["directory"],
                               finetune_config["file"]),
                     model,
                     strict=False)

        try:
            chainer.cuda.get_device_from_id(0).use()
            gpu = 0
            print("transferring model to GPU...")
            model.to_gpu(gpu)
            print("GPU enabled")
        except RuntimeError:
            gpu = -1
            print("GPU disabled")

        dataset_module = import_module(config["dataset"]["module"],
                                       config["dataset"]["package"])
        Dataset = getattr(dataset_module, config["dataset"]["class"])
        train_dataset = Dataset(**config["dataset"]["train"]["params"])
        valid_dataset = Dataset(**config["dataset"]["valid"]["params"])

        train_iterator = Iterator(train_dataset, config["batch"]["train"],
                                  True, True)
        valid_iterator = Iterator(valid_dataset, config["batch"]["valid"],
                                  False, False)

        Optimizer = getattr(chainer.optimizers, config["optimizer"]["class"])
        optimizer = Optimizer(**config["optimizer"]["params"])

        optimizer.setup(model)

        for hook_config in config["optimizer"]["hook"]:
            hook_module = import_module(hook_config["module"],
                                        hook_config["package"])
            Hook = getattr(hook_module, hook_config["class"])
            hook = Hook(**hook_config["params"])
            optimizer.add_hook(hook)

        updater = Updater(train_iterator, optimizer, device=gpu)

        trainer = Trainer(updater, **config["trainer"]["params"])
        trainer.extend(snapshot(),
                       trigger=config["trainer"]["snapshot_interval"])
        trainer.extend(snapshot_object(model,
                                       "model_iter_{.updater.iteration}"),
                       trigger=config["trainer"]["model_interval"])
        trainer.extend(observe_lr(), trigger=config["trainer"]["log_interval"])
        trainer.extend(
            LogReport(
                ["epoch", "iteration", "main/loss", "validation/main/loss"],
                trigger=config["trainer"]["log_interval"]))
        trainer.extend(Evaluator(valid_iterator, model, device=gpu),
                       trigger=config["trainer"]["eval_interval"])
        trainer.extend(PrintReport(
            ["epoch", "iteration", "main/loss", "validation/main/loss"]),
                       trigger=config["trainer"]["log_interval"])
        trainer.extend(ProgressBar(update_interval=1))

        if "schedule" in config["additional information"].keys():
            for i, interval_funcs in enumerate(
                    config["additional information"]["schedule"].items()):
                interval, funcs = interval_funcs
                f = lambda trainer, funcs=funcs: [
                    trainer.updater.get_optimizer('main').target.
                    __getattribute__(func["function"])(*func["params"])
                    for func in funcs
                ]
                trainer.extend(f,
                               name="schedule_{}".format(i),
                               trigger=ManualScheduleTrigger(*interval))
        trainer.run()
        log_config(config, "succeeded")

    except Exception as e:
        log_config(config, "unintentional termination")
        raise e
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--lr',
                        '-l',
                        type=float,
                        default=None,
                        help='Learning rate for multi GPUs')
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    # chainermn
    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names),
                         pretrained_model='imagenet',
                         iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        SBDInstanceSegmentationDataset(split='train'),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  batch_size=args.batchsize //
                                                  comm.size)

    if comm.rank == 0:
        test_dataset = SBDInstanceSegmentationDataset(split='val')
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    @make_shift('lr')
    def lr_scheduler(trainer):
        if args.lr is None:
            base_lr = 0.0005 * args.batchsize
        else:
            base_lr = args.lr

        epoch = trainer.updater.epoch
        if epoch < args.cooldown_epoch:
            rate = 1
        else:
            rate = 0.1
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationVOCEvaluator(
            test_iter,
            model.fcis,
            iou_thresh=0.5,
            use_07_metric=True,
            label_names=sbd_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: LightHeadRCNN')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=1234)
    parser.add_argument('--batch-size', '-b', type=int, default=2)
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    np.random.seed(args.seed)
    random.seed(args.seed)

    # model
    light_head_rcnn = LightHeadRCNNResNet101(
        pretrained_model='imagenet', n_fg_class=len(coco_bbox_label_names))
    light_head_rcnn.use_preset('evaluate')
    model = LightHeadRCNNTrainChain(light_head_rcnn)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # train dataset
    train_dataset = COCOBboxDataset(year='2014', split='train')
    vmml_dataset = COCOBboxDataset(year='2014', split='valminusminival')

    # filter non-annotated data
    train_indices = np.array([
        i for i, label in enumerate(train_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                             dtype=np.int32)
    train_dataset = train_dataset.slice[train_indices]
    vmml_indices = np.array([
        i for i, label in enumerate(vmml_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                            dtype=np.int32)
    vmml_dataset = vmml_dataset.slice[vmml_indices]

    train_dataset = TransformDataset(
        ConcatenatedDataset(train_dataset, vmml_dataset),
        ('img', 'bbox', 'label', 'scale'), Transform(model.light_head_rcnn))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  batch_size=args.batch_size)

    if comm.rank == 0:
        test_dataset = COCOBboxDataset(year='2014',
                                       split='minival',
                                       use_crowded=True,
                                       return_crowded=True,
                                       return_area=True)
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(momentum=0.9), comm)
    optimizer.setup(model)

    global_context_module = model.light_head_rcnn.head.global_context_module
    global_context_module.col_max.W.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.col_max.b.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.col.W.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.col.b.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.row_max.W.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.row_max.b.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.row.W.update_rule.add_hook(GradientScaling(3.0))
    global_context_module.row.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.light_head_rcnn.extractor.conv1.disable_update()
    model.light_head_rcnn.extractor.res2.disable_update()

    converter = functools.partial(
        concat_examples,
        padding=0,
        # img, bboxes, labels, scales
        indices_concat=[0, 2, 3],  # img, _, labels, scales
        indices_to_device=[0],  # img
    )

    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       converter=converter,
                                                       device=device)
    trainer = chainer.training.Trainer(updater, (30, 'epoch'), out=args.out)

    @make_shift('lr')
    def lr_scheduler(trainer):
        base_lr = 0.0005 * 1.25 * args.batch_size * comm.size
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = trainer.updater.iteration
        epoch = trainer.updater.epoch
        if iteration < warm_up_duration:
            rate = warm_up_rate \
                + (1 - warm_up_rate) * iteration / warm_up_duration
        elif epoch < 20:
            rate = 1
        elif epoch < 26:
            rate = 0.1
        else:
            rate = 0.01
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        model_name = model.light_head_rcnn.__class__.__name__
        trainer.extend(chainer.training.extensions.snapshot_object(
            model.light_head_rcnn,
            savefun=chainer.serializers.save_npz,
            filename='%s_model_iter_{.updater.iteration}.npz' % model_name),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]
        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(DetectionCOCOEvaluator(
            test_iter,
            model.light_head_rcnn,
            label_names=coco_bbox_label_names),
                       trigger=ManualScheduleTrigger([20, 26], 'epoch'))
        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    train_data = VOCDetectionDataset(split='trainval', year='2007')
    test_data = VOCDetectionDataset(split='test',
                                    year='2007',
                                    use_difficult=True,
                                    return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
        chainer.cuda.get_device(args.gpu).use()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    def transform(in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally flip
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'])

        return img, bbox, label, scale

    train_data = TransformDataset(train_data, transform)

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(
        DetectionVOCEvaluator(test_iter,
                              model.faster_rcnn,
                              use_07_metric=True,
                              label_names=voc_detection_label_names),
        trigger=ManualScheduleTrigger([args.step_size, args.iteration],
                                      'iteration'),
        invoke_before_training=False)

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 16
0
 def __init__(self, out_name='mcg.dot'):
     self._out_name = out_name
     self._hook = LineProfileHook()
     self.trigger = ManualScheduleTrigger(1, 'iteration')
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset',
                        choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCBboxDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCBboxDataset(year='2007', split='trainval'),
            VOCBboxDataset(year='2012', split='trainval'))
    test_data = VOCBboxDataset(split='test',
                               year='2007',
                               use_difficult=True,
                               return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updaters.StandardUpdater(train_iter,
                                                        optimizer,
                                                        device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model.faster_rcnn,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=ManualScheduleTrigger(
                       [args.step_size, args.iteration], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(description='Segmentation model')
    parser.add_argument('--config',
                        '-c',
                        default='examples/configs/seg_resnet.yaml')
    parser.add_argument('--out',
                        '-o',
                        default='results',
                        help='Output directory')
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--snapshot',
                        type=int,
                        help='Snapshot interval',
                        default=1)
    parser.add_argument('--val-set', type=int)
    parser.add_argument('--predict', action='store_true')

    parser.add_argument('--benchmark',
                        action='store_true',
                        help='To run benchmark mode')
    parser.add_argument(
        '--benchmark-iterations',
        type=int,
        default=500,
        help='the number of iterations when using benchmark mode')
    parser.add_argument('--cprofile',
                        action='store_true',
                        help='To profile with cprofile')

    args = parser.parse_args()
    config = load_config(yaml.load(open(args.config)), dump_yaml_dir=args.out)

    comm = chainermn.create_communicator(communicator_name='pure_nccl')
    device = comm.intra_rank + args.gpu
    cuda.get_device_from_id(device).use()
    if comm.size != config['n_gpu']:
        raise ValueError('# of GPUs specified in config file does not match '
                         'the actual number of available GPUs. '
                         'Expected={} Actual={}'.format(
                             config['n_gpu'], comm.size))

    if args.val_set is not None:
        assert 0 <= args.val_set <= 9
        config['val_set'] = args.val_set

    trainer_stop_trigger = config["epoch"], 'epoch'
    if args.benchmark:
        trainer_stop_trigger = args.benchmark_iterations, 'iteration'

    # Setup model
    model = setup_model(config, 0)
    if config.get('resume'):
        chainer.serializers.load_npz(config['resume'], model)
    train_chain = TrainChain(model, config['downscale'])
    train_chain.to_gpu()

    # Setup dataset
    if comm.rank == 0:
        dataset = RSNATrainDataset()

        # Determine samples to pick up
        assert config['view_position'] in ('both', 'pa', 'ap', 'no-pa-pos')
        if config['view_position'] == 'both':
            mask = np.ones(len(dataset), dtype=bool)
        elif config['view_position'] == 'no-pa-pos':
            mask = dataset.patient_df['ViewPosition'].values == 'PA'
            mask &= dataset.get_mask_for_positive_samples()
            mask = ~mask
        else:
            mask = dataset.patient_df['ViewPosition'].values == 'PA'
            if config['view_position'] == 'ap':
                mask = ~mask

        if config['val_set'] == -1:
            train_mask = mask & (dataset.patient_df['withinTestRange'].values
                                 == 0)
            train_indices = train_mask.nonzero()[0]
            val_mask = mask & (dataset.patient_df['withinTestRange'].values
                               == 1)
            val_indices = val_mask.nonzero()[0]
        else:
            train_indices, val_indices = create_train_val_indices(
                mask, config['val_set'])
        train_data = dataset.slice[train_indices]
        val_data = dataset.slice[val_indices]
        print('train = {}, val = {}'.format(len(train_data), len(val_data)))

        positive_mask = dataset.get_mask_for_positive_samples()[train_indices]
        if config['oversampling_rate'] > 1:
            train_data = oversample_dataset(train_data, positive_mask,
                                            config['oversampling_rate'])
            print('==> train = {} ({}x oversampled with {} positive samples)'.
                  format(len(train_data), config['oversampling_rate'],
                         positive_mask.sum()))
        else:
            print('--> no oversampling with {} positive samples'.format(
                positive_mask.sum()))

        train_data = TransformDataset(train_data, preprocess)
        val_data = TransformDataset(val_data, preprocess)

        # Data augmentation
        augment = Augment(config['downscale'], config['data_augmentation'])
        train_data = TransformDataset(train_data, augment)
    else:
        train_data, val_data = None, None

    train_data = chainermn.scatter_dataset(train_data, comm)
    val_data = chainermn.scatter_dataset(val_data, comm)

    # Setup iterator, optimizer and updater
    train_iter = MultiprocessIterator(train_data,
                                      batch_size=config['batch_size'],
                                      shared_mem=10000000)
    val_iter = MultiprocessIterator(val_data,
                                    batch_size=config['batch_size'],
                                    repeat=False,
                                    shuffle=False,
                                    shared_mem=10000000)

    optimizer = setup_optimizer(config, comm, train_chain)
    if not config.get('resume') and config['extractor_freeze_iteration'] != 0:
        model.unet.extractor.disable_update()

    updater = chainer.training.updaters.StandardUpdater(
        train_iter,
        optimizer,
        device=device,
        converter=lambda x, y: chainer.dataset.concat_examples(x, y, 0))

    # Setup trainer
    trainer = chainer.training.Trainer(updater,
                                       stop_trigger=trainer_stop_trigger,
                                       out=args.out)

    trainer.extend(setup_lr_scheduler(config), trigger=(1, 'iteration'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        print_interval = 10, 'iteration'

        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=print_interval)
        entries = ['iteration', 'epoch', 'elapsed_time', 'lr']
        measurements = [
            'loss', 'seg_f1', 'seg_loss', 'edge_loss', 'raw_edge_loss'
        ]
        entries.extend(['main/{}'.format(x) for x in measurements])
        entries.extend(['validation/main/{}'.format(x) for x in measurements])
        trainer.extend(extensions.PrintReport(entries), trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if args.snapshot > 0:
            trainer.extend(extensions.snapshot_object(
                model, 'model_epoch_{.updater.epoch}.npz'),
                           trigger=(args.snapshot, 'epoch'))
        trainer.extend(extensions.snapshot_object(model, 'final_model.npz'),
                       trigger=trainer_stop_trigger)

    evaluator = extensions.Evaluator(
        val_iter,
        train_chain,
        device=device,
        converter=lambda x, y: chainer.dataset.concat_examples(x, y, 0))
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    @chainer.training.make_extension(trigger=(1, 'epoch'), priority=-100)
    def enable_extractor_update(_):
        print('enable update!')
        model.unet.extractor.enable_update()

    if config['extractor_freeze_iteration'] > 0:  # no melt if -1
        melt_trigger = ManualScheduleTrigger(
            config['extractor_freeze_iteration'], 'iteration')
        trainer.extend(enable_extractor_update, trigger=melt_trigger)

    trainer.run()

    if args.predict:
        if comm.rank == 0:
            commands = [
                '--out',
                '{}/t0.01.csv'.format(args.out),
                '--model',
                '{}/final_model.npz'.format(args.out),
                '--config',
                args.config,
                '--val-set',
                str(config['val_set']),
                '--gpu',
                str(args.gpu),
                '--thresh',
                '0.01',
            ]
            predict.main(commands)

            commands[1] = '{}/test-t0.01.csv'.format(args.out)
            commands.append('--test')
            predict.main(commands)