Beispiel #1
0
 def test_transform(self):
     if isinstance(self.keys, tuple):
         keys = self.iterable(self.keys)
     else:
         keys = self.keys
     dataset = TransformDataset(self.dataset, keys, self.func)
     self._check(dataset, self.keys)
Beispiel #2
0
 def test_transform_compat(self):
     if isinstance(self.keys, tuple):
         expected_keys = (None, ) * len(self.keys)
     else:
         expected_keys = None
     dataset = TransformDataset(self.dataset, self.func)
     self._check(dataset, expected_keys)
    def test_transform_without_keys(self):
        def func(in_data):
            item0, item1, item2 = in_data
            return 'transformed_' + item0, 'transformed_' + item2

        dataset = TransformDataset(self.dataset, 2, func)
        self.assertIsInstance(dataset, SliceableDataset)
        self.assertEqual(len(dataset), len(self.dataset))
        self.assertEqual(dataset.keys, (None, None))
        self.assertEqual(dataset[3],
                         ('transformed_item0(3)', 'transformed_item2(3)'))
    def test_transform(self):
        def func(in_data):
            item0, item1, item2 = in_data
            return 'transformed_' + item0, 'transformed_' + item2

        dataset = TransformDataset(self.dataset,
                                   self.iterable(('item0', 'item2')), func)
        self.assertIsInstance(dataset, SliceableDataset)
        self.assertEqual(len(dataset), len(self.dataset))
        self.assertEqual(dataset.keys, ('item0', 'item2'))
        self.assertEqual(dataset[3],
                         ('transformed_item0(3)', 'transformed_item2(3)'))
Beispiel #5
0
 def test_transform_with_n_keys(self):
     if isinstance(self.keys, tuple):
         n_keys = len(self.keys)
         if n_keys == 1:
             self.skipTest('tuple of single element is not supported '
                           'when the number of keys is specified')
         expected_keys = (None, ) * n_keys
     else:
         n_keys = 1
         expected_keys = None
     dataset = TransformDataset(self.dataset, n_keys, self.func)
     self._check(dataset, expected_keys)
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--pretrained-model')
    args = parser.parse_args()

    model = ResNet50(pretrained_model=args.pretrained_model,
                     n_class=len(voc_bbox_label_names),
                     arch='he')
    model.pick = 'fc6'
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    dataset = VOCBboxDataset(split='test', year='2007', use_difficult=False)
    dataset = TransformDataset(dataset, ('img', 'bbox'), bbox_to_multi_label)
    iterator = iterators.SerialIterator(dataset,
                                        8,
                                        repeat=False,
                                        shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(
        PredictFunc(model, thresh=0),
        iterator,
        hook=ProgressHook(len(dataset)))
    # delete unused iterators explicitly
    del in_values
    pred_labels, pred_scores = out_values
    gt_labels, = rest_values

    result = eval_multi_label_classification(pred_labels, pred_scores,
                                             gt_labels)
    print()
    print('mAP: {:f}'.format(result['map']))
    for l, name in enumerate(voc_bbox_label_names):
        if result['ap'][l]:
            print('{:s}: {:f}'.format(name, result['ap'][l]))
        else:
            print('{:s}: -'.format(name))
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate will be multiplied by the number of gpu')
    parser.add_argument('--lr-cooldown-factor',
                        '-lcf',
                        type=float,
                        default=0.1)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=list, default=[28, 31])
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    fcis = FCISPSROIAlignResNet101(
        n_fg_class=len(sbd_instance_segmentation_label_names),
        pretrained_model='imagenet',
        iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        SBDInstanceSegmentationDataset(split='train'),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    if comm.rank == 0:
        test_dataset = SBDInstanceSegmentationDataset(split='val')
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9),
        comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift(
        'lr', args.lr_cooldown_factor, init=args.lr * comm.size),
                   trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch'))

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        model_name = model.fcis.__class__.__name__

        trainer.extend(extensions.snapshot_object(
            model.fcis,
            filename='%s_model_iter_{.updater.iteration}.npz' % model_name),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationVOCEvaluator(
            test_iter,
            model.fcis,
            iou_thresh=0.5,
            use_07_metric=True,
            label_names=sbd_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger(args.cooldown_epoch,
                                                     'epoch'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Beispiel #8
0
def main():
    model_cfgs = {
        'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6',
                     'kwargs': {'arch': 'fb'}},
        'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}},
        'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}}
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')

    parser.add_argument('--export', type=str, default=None,
                        help='Export the model to ONNX')
    parser.add_argument('--compile', type=str, default=None,
                        help='Compile the model')
    parser.add_argument('--computation_order', type=str, default=None,
                        help='Computation order in backpropagation')

    parser.add_argument('--model',
                        '-m', choices=model_cfgs.keys(), default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator', type=str,
                        default='pure_nccl', help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize', type=int, default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight-decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    parser.add_argument('--iterations', '-I', type=int, default=None,
                        help='Number of iterations to train')
    parser.add_argument('--no_use_fixed_batch_dataset',
                        dest='use_fixed_batch_dataset',
                        action='store_false',
                        help='Disable the use of FixedBatchDataset')
    parser.add_argument('--compiler-log', action='store_true',
                        help='Enables compile-time logging')
    parser.add_argument('--trace', action='store_true',
                        help='Enables runtime tracing')
    parser.add_argument('--verbose', action='store_true',
                        help='Enables runtime verbose log')
    parser.add_argument('--skip_runtime_type_check', action='store_true',
                        help='Skip runtime type check')
    parser.add_argument('--dump_memory_usage', type=int, default=0,
                        help='Dump memory usage (0-2)')
    parser.add_argument('--quiet_period', type=int, default=0,
                        help='Quiet period after runtime report')
    parser.add_argument('--overwrite_batchsize', action='store_true',
                        help='Overwrite batch size')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](
        n_class=len(label_names), **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']

    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in extractor.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    if args.export is not None:
        chainer_compiler.use_unified_memory_allocator()
        extractor.to_device(device)
        x = extractor.xp.zeros((args.batchsize, 3, 224, 224)).astype('f')
        chainer_compiler.export(extractor, [x], args.export)
        return

    if args.compile is not None:
        print('run compiled model')
        chainer_compiler.use_chainerx_shared_allocator()
        extractor.to_device(device)
        # init params
        with chainer.using_config('enable_backprop', False),\
                chainer.using_config('train', False):
            x = extractor.xp.zeros((1, 3, 224, 224)).astype('f')
            extractor(x)

        compiler_kwargs = {}
        if args.compiler_log:
            compiler_kwargs['compiler_log'] = True
        runtime_kwargs = {}
        if args.trace:
            runtime_kwargs['trace'] = True
        if args.verbose:
            runtime_kwargs['verbose'] = True
        if args.skip_runtime_type_check:
            runtime_kwargs['check_types'] = False
        if args.dump_memory_usage >= 1:
            runtime_kwargs['dump_memory_usage'] = args.dump_memory_usage
            free, total = cupy.cuda.runtime.memGetInfo()
            used = total - free
            runtime_kwargs['base_memory_usage'] = used

        onnx_filename = args.compile
        if args.overwrite_batchsize:
            new_onnx_filename = ('/tmp/overwrite_batchsize_' +
                                 os.path.basename(onnx_filename))
            new_input_types = [
                input_rewriter.Type(shape=(args.batchsize, 3, 224, 224))
            ]
            input_rewriter.rewrite_onnx_file(onnx_filename,
                                             new_onnx_filename,
                                             new_input_types)
            onnx_filename = new_onnx_filename

        extractor_cc = chainer_compiler.compile_onnx(
            extractor,
            onnx_filename,
            'onnx_chainer',
            computation_order=args.computation_order,
            compiler_kwargs=compiler_kwargs,
            runtime_kwargs=runtime_kwargs,
            quiet_period=args.quiet_period)
        model = Classifier(extractor_cc)
    else:
        print('run vanilla chainer model')
        model = Classifier(extractor)

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(
        train_data, ('img', 'label'), TrainTransform(extractor.mean))
    val_data = TransformDataset(
        val_data, ('img', 'label'), ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(
        train_indices, comm, shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    if args.use_fixed_batch_dataset:
        train_data = FixedBatchDataset(train_data, args.batchsize)
        val_data = FixedBatchDataset(val_data, args.batchsize)
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(
        val_data, args.batchsize,
        repeat=False, shuffle=False, n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=device)

    if args.iterations:
        stop_trigger = (args.iterations, 'iteration')
    else:
        stop_trigger = (args.epoch, 'epoch')
    trainer = training.Trainer(
        updater, stop_trigger, out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(
            extensions.snapshot_object(
                extractor, 'snapshot_model_{.updater.epoch}.npz'),
            trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport(
            ['iteration', 'epoch', 'elapsed_time', 'lr',
             'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy']
        ), trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Beispiel #9
0
    def __call__(self, in_data):
        if self.random is None:
            # Different seeds on different processes
            self.random = np.random.RandomState()
        fg_img, point, label, msk = in_data
        _, H, W = fg_img.shape

        index = self.random.randint(0, len(self.voc))
        img = self.voc[index]
        img = resize(img, (H, W))
        img[:, msk] = fg_img[:, msk]

        img, point = random_crop(img, point)

        # skipping color related augmentation
        return img, point, label


if __name__ == '__main__':
    from linemod_dataset import LinemodDataset
    from vis_point import vis_point
    import matplotlib.pyplot as plt
    from chainercv.chainer_experimental.datasets.sliceable import \
        TransformDataset

    dataset = LinemodDataset('..', split='train', return_msk=True)
    dataset = TransformDataset(dataset, ('img', 'point', 'label'), Transform())
    img, point, label = dataset[0]
    vis_point(img, point)
    plt.show()
Beispiel #10
0
                         n_class=10,
                         n_layer=args.n_layer,
                         use_auxiliary=args.use_auxiliary,
                         drop_path_prob=args.drop_path_prob)
    classifier = TrainChain(model, args.use_auxiliary)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        classifier.to_gpu()

    optimizer = chainer.optimizers.CorrectedMomentumSGD(args.lr, args.momentum)
    optimizer.setup(classifier)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(3e-4))

    train_data, val_data = chainer.datasets.get_cifar10()
    train_data = TransformDataset(
        train_data, ('img', 'label'),
        CIFAR10TrainTransform(args.use_cutout, args.cutout_length))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                cifar10_val_transform)

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        args.batchsize,
                                                        n_prefetch=1)
    val_iter = chainer.iterators.MultiprocessIterator(val_data,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False)
    evaluator = extensions.Evaluator(val_iter, classifier, device=args.gpu)

    updater = chainer.training.updaters.StandardUpdater(train_iter,
                                                        optimizer,
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate will be multiplied by the number of gpu')
    parser.add_argument('--no-ohem', action='store_true')
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    proposal_creator_params = {
        'nms_thresh': 0.7,
        'n_train_pre_nms': 12000,
        'n_train_post_nms': 2000,
        'n_test_pre_nms': 6000,
        'n_test_post_nms': 1000,
        'force_cpu_nms': False,
        'min_size': 0
    }

    fcis = FCISPSROIAlignResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        min_size=800,
        max_size=1333,
        anchor_scales=(2, 4, 8, 16, 32),
        pretrained_model='imagenet',
        iter2=False,
        proposal_creator_params=proposal_creator_params)
    fcis.use_preset('coco_evaluate')
    if args.no_ohem:
        model = FCISTrainChain(
            fcis,
            n_ohem_sample=None,
            proposal_target_creator=ProposalTargetCreator(n_sample=128))
    else:
        model = FCISTrainChain(fcis)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        ConcatenatedDataset(
            COCOInstanceSegmentationDataset(split='train'),
            COCOInstanceSegmentationDataset(split='valminusminival')),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    test_dataset = COCOInstanceSegmentationDataset(split='minival',
                                                   use_crowded=True,
                                                   return_crowded=True,
                                                   return_area=True)
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    if comm.rank == 0:
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9),
        comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    converter = functools.partial(
        concat_examples,
        padding=0,
        # img, masks, labels, bboxes, scales
        indices_concat=[0, 1, 2, 4],  # img, masks, labels, _, scales
        indices_to_device=[0],  # img
    )

    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       converter=converter,
                                                       device=device)

    trainer = chainer.training.Trainer(updater, (18, 'epoch'), out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift('lr',
                                                                0.1,
                                                                init=args.lr *
                                                                comm.size),
                   trigger=ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 10, 'iteration'

        # training extensions
        model_name = model.fcis.__class__.__name__
        trainer.extend(chainer.training.extensions.snapshot_object(
            model.fcis,
            savefun=chainer.serializers.save_npz,
            filename='%s_model_iter_{.updater.iteration}.npz' % model_name),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]

        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.fcis,
            label_names=coco_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger(
                           [len(train_dataset) * 12,
                            len(train_dataset) * 15], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
def main(config):
    opts = config()
    device = 0

    backborn_cfg = opts.backborn_cfg

    test_df = pd.read_csv(opts.path_data + opts.test_df)

    test_dataset = opts.dataset(test_df, opts.path_data, mode='test')

    backborn = chcv2_get_model(backborn_cfg['name'],
                               pretrained=backborn_cfg['pretrain'],
                               in_size=opts.input_shape)[backborn_cfg['layer']]

    model = opts.model(backborn=backborn)

    mean = opts.mean

    test_data = TransformDataset(test_dataset, opts.valid_trainsform)
    test_data_flip1 = TransformDataset(test_dataset,
                                       opts.valid_trainsform_flip1)
    test_data_flip2 = TransformDataset(test_dataset,
                                       opts.valid_trainsform_flip2)
    test_data_flip3 = TransformDataset(test_dataset,
                                       opts.valid_trainsform_flip3)

    print('finished loading dataset')

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    result = np.zeros((int(test_data.__len__() / 2), opts.num_class))
    test_len = int(test_data.__len__())
    for fold in trange(opts.fold, desc='fold loop'):
        chainer.serializers.load_npz(
            opts.out + '_fold' + str(fold) + '/snapshot_model_f1max.npz',
            model)
        for i in trange(test_len, desc='id loop'):
            for _ in range(4):
                img, img_id = test_data.get_example(i)
                img = img[None, :, :, :]
                with chainer.using_config('train',
                                          False), chainer.using_config(
                                              'enable_backprop', False):
                    res = chainer.cuda.to_cpu(F.softmax(model(img)[1]).data)
                result[i %
                       int(test_len / 2)] = result[i % int(test_len / 2)] + res
                ############################### TTA ###############################
                img, img_id = test_data_flip1.get_example(i)
                img = img[None, :, :, :]
                with chainer.using_config('train',
                                          False), chainer.using_config(
                                              'enable_backprop', False):
                    res = chainer.cuda.to_cpu(F.softmax(model(img)[1]).data)
                result[i %
                       int(test_len / 2)] = result[i % int(test_len / 2)] + res
                img, img_id = test_data_flip2.get_example(i)
                img = img[None, :, :, :]
                with chainer.using_config('train',
                                          False), chainer.using_config(
                                              'enable_backprop', False):
                    res = chainer.cuda.to_cpu(F.softmax(model(img)[1]).data)
                result[i %
                       int(test_len / 2)] = result[i % int(test_len / 2)] + res
                img, img_id = test_data_flip3.get_example(i)
                img = img[None, :, :, :]
                with chainer.using_config('train',
                                          False), chainer.using_config(
                                              'enable_backprop', False):
                    res = chainer.cuda.to_cpu(F.softmax(model(img)[1]).data)
                result[i %
                       int(test_len / 2)] = result[i % int(test_len / 2)] + res
                ##################################################################

    result = result / opts.fold / 32
    probs = result
    np.save(opts.out + "_probs.npy", probs)
    result_t = np.argmax(result, axis=1)
    print(result_t.shape)
    submission = pd.read_csv(opts.path_data + '/test.csv')
    submission['sirna'] = result_t.astype(int)
    submission.to_csv('submission_' + opts.out + '.csv',
                      index=False,
                      columns=['id_code', 'sirna'])

    ############################### Use leak #########################################
    train_csv = pd.read_csv("../input/train.csv")
    test_csv = pd.read_csv("../input/test.csv")
    sub = pd.read_csv('submission_' + opts.out + '.csv')

    plate_groups = np.zeros((1108, 4), int)
    for sirna in range(1108):
        grp = train_csv.loc[train_csv.sirna ==
                            sirna, :].plate.value_counts().index.values
        assert len(grp) == 3
        plate_groups[sirna, 0:3] = grp
        plate_groups[sirna, 3] = 10 - grp.sum()

    all_test_exp = test_csv.experiment.unique()
    group_plate_probs = np.zeros((len(all_test_exp), 4))
    for idx in range(len(all_test_exp)):
        preds = sub.loc[test_csv.experiment == all_test_exp[idx], 'sirna']
        pp_mult = np.zeros((len(preds), 1108))
        pp_mult[range(len(preds)), preds] = 1

        sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx], :]
        assert len(pp_mult) == len(sub_test)

        for j in range(4):
            mask = np.repeat(plate_groups[np.newaxis, :, j], len(pp_mult), axis=0) == \
                   np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)

            group_plate_probs[idx,
                              j] = np.array(pp_mult)[mask].sum() / len(pp_mult)
    exp_to_group = group_plate_probs.argmax(1)

    def select_plate_group(pp_mult, idx):
        sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx], :]
        assert len(pp_mult) == len(sub_test)
        mask = np.repeat(plate_groups[np.newaxis, :, exp_to_group[idx]], len(pp_mult), axis=0) != \
               np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
        pp_mult[mask] = 0
        return pp_mult

    for idx in range(len(all_test_exp)):
        #print('Experiment', idx)
        indices = (test_csv.experiment == all_test_exp[idx])

        preds = result[indices, :].copy()

        preds = select_plate_group(preds, idx)
        result[indices, :] = preds
    probs_leak = result
    np.save(opts.out + "_probs_leak.npy", probs_leak)
    result = np.argmax(result, axis=1)
    print(result.shape)
    submission = pd.read_csv('submission_' + opts.out + '.csv')
    submission['sirna'] = result.astype(int)
    submission.to_csv('submission_leak_' + opts.out + '.csv',
                      index=False,
                      columns=['id_code', 'sirna'])
    ###################################################################################
    ############################### Use Hungarian Algorithm##################################
    import scipy
    import scipy.special
    import scipy.optimize

    def assign_plate(plate):
        probabilities = np.array(plate)
        cost = probabilities * -1
        rows, cols = scipy.optimize.linear_sum_assignment(cost)
        chosen_elements = set(zip(rows.tolist(), cols.tolist()))

        for sample in range(cost.shape[0]):
            for sirna in range(cost.shape[1]):
                if (sample, sirna) not in chosen_elements:
                    probabilities[sample, sirna] = 0

        return probabilities

    current_plate = None
    plate_probabilities = []
    probs_hungarian = []
    for i, name in tqdm(enumerate(submission['id_code'])):
        experiment, plate, _ = name.split('_')
        if plate != current_plate:
            if current_plate is not None:
                probs_hungarian.extend(
                    [x for x in assign_plate(plate_probabilities)])
            plate_probabilities = []
            current_plate = plate
        plate_probabilities.append(scipy.special.softmax(probs_leak[i]))
    probs_hungarian.extend([x for x in assign_plate(plate_probabilities)])

    np.save(opts.out + "_probs_leak_hungarian.npy", probs_hungarian)
    result = np.argmax(result, axis=1)
    print(result.shape)
    submission = pd.read_csv('submission_' + opts.out + '.csv')
    submission['sirna'] = result.astype(int)
    submission.to_csv('submission_leak_hungarian' + opts.out + '.csv',
                      index=False,
                      columns=['id_code', 'sirna'])

    return probs, probs_leak, probs_hungarian
def main():
    model_cfgs = {
        'resnet50': {
            'class': ResNet50,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet101': {
            'class': ResNet101,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet152': {
            'class': ResNet152,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        }
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--model',
                        '-m',
                        choices=model_cfgs.keys(),
                        default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator',
                        type=str,
                        default='pure_nccl',
                        help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize',
                        type=int,
                        default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight_decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](n_class=len(label_names),
                                   **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']
    model = Classifier(extractor)
    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in model.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(train_data, ('img', 'label'),
                                  TrainTransform(extractor.mean))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(train_indices,
                                              comm,
                                              shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(val_data,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=device)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.snapshot_object(
            extractor, 'snapshot_model_{.updater.epoch}.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser(
        description='Chainer Multi-label classification')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=4,
                        help='Number of images in each mini-batch')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    args = parser.parse_args()

    model = get_resnet_50(len(voc_bbox_label_names))
    model.pick = 'fc6'
    train_chain = MultiLabelClassifier(model,
                                       loss_scale=len(voc_bbox_label_names))

    train = VOCBboxDataset(year='2007', split='trainval', use_difficult=False)
    train = TransformDataset(train, ('img', 'bbox'), bbox_to_multi_label)
    test = VOCBboxDataset(year='2007', split='test', use_difficult=False)
    test = TransformDataset(test, ('img', 'bbox'), bbox_to_multi_label)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        train_chain.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(0.001)
    optimizer.setup(train_chain)

    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    stop_trigger = (11, 'epoch')
    log_interval = (20, 'iteration')

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu,
                                                converter=converter)
    trainer = training.Trainer(updater, stop_trigger, out=args.out)
    trainer.extend(
        extensions.Evaluator(test_iter,
                             train_chain,
                             device=args.gpu,
                             converter=converter))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=triggers.ManualScheduleTrigger([8, 10], 'epoch'))

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'lr',
        'epoch',
        'elapsed_time',
        'main/loss',
        'main/recall',
        'main/precision',
        'main/n_pred',
        'main/n_pos',
        'validation/main/loss',
        'validation/main/recall',
        'validation/main/precision',
        'validation/main/n_pred',
        'validation/main/n_pos',
    ]),
                   trigger=log_interval)

    trainer.extend(extensions.snapshot_object(model, 'snapshot_model.npz'))
    trainer.extend(extensions.LogReport(trigger=log_interval))

    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model', choices=('resnet50', 'resnet101'))
    parser.add_argument('--batchsize', type=int, default=16)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'resnet50':
        model = FasterRCNNFPNResNet50(
            n_fg_class=len(coco_bbox_label_names), mean='chainercv')
        copyparams(model.extractor.base,
                   ResNet50(pretrained_model='imagenet', arch='he'))
    elif args.model == 'resnet101':
        model = FasterRCNNFPNResNet101(
            n_fg_class=len(coco_bbox_label_names), mean='chainercv')
        copyparams(model.extractor.base,
                   ResNet101(pretrained_model='imagenet', arch='he'))

    model.use_preset('evaluate')
    train_chain = TrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(
            COCOBboxDataset(split='train'),
            COCOBboxDataset(split='valminusminival'),
        ), ('img', 'bbox', 'label'), transform)

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultithreadIterator(
        train, args.batchsize // comm.size)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    optimizer.add_hook(WeightDecay(0.0001))

    model.extractor.base.conv1.disable_update()
    model.extractor.base.res2.disable_update()
    for link in model.links():
        if isinstance(link, L.BatchNormalization):
            link.disable_update()

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(
        updater, (90000 * 16 / args.batchsize, 'iteration'), args.out)

    def lr_schedule(updater):
        base_lr = 0.02 * args.batchsize / 16
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = updater.iteration
        if iteration < warm_up_duration:
            rate = warm_up_rate \
                + (1 - warm_up_rate) * iteration / warm_up_duration
        elif iteration < 60000 * 16 / args.batchsize:
            rate = 1
        elif iteration < 80000 * 16 / args.batchsize:
            rate = 0.1
        else:
            rate = 0.01

        return base_lr * rate

    trainer.extend(ManualScheduler('lr', lr_schedule))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'lr', 'main/loss',
             'main/loss/rpn/loc', 'main/loss/rpn/conf',
             'main/loss/head/loc', 'main/loss/head/conf']),
            trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(
            extensions.snapshot_object(
                model, 'model_iter_{.updater.iteration}'),
            trigger=(90000 * 16 / args.batchsize, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
Beispiel #16
0
                        help='Output directory')
    parser.add_argument('--pretrained-model',
                        default='',
                        type=str,
                        help='Output directory')
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--batchsize', '-b', type=int, default=24)
    parser.add_argument('--conf-loss-scale', '-c', type=float, default=1)
    parser.add_argument('object')
    args = parser.parse_args()

    train_data = LinemodDataset('.',
                                obj_name=args.object,
                                split='train',
                                return_msk=True)
    train_data = TransformDataset(train_data, ('img', 'point', 'label'),
                                  Transform())
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data,
        batch_size=args.batchsize,
        n_processes=None,
        shared_mem=100000000)
    test_data = LinemodDataset('.', obj_name=args.object, split='test')
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 1,
                                                 repeat=False,
                                                 shuffle=False)

    model = SSPYOLOv2()
    if args.pretrained_model:
        chainer.serializers.load_npz(args.pretrained_model, model)
    train_chain = TrainChain(model, train_iter)
def main():
    args = parse_args()
    cfg.merge_from_file(args.config)
    cfg.freeze()

    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator('pure_nccl')
    assert comm.size == cfg.n_gpu
    device = comm.intra_rank

    if comm.rank == 0:
        print(cfg)

    model = setup_model(cfg)
    train_chain = setup_train_chain(cfg, model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    train_dataset = TransformDataset(setup_dataset(cfg, 'train'),
                                     ('img', 'bbox', 'label'), Transform())
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_dataset,
        cfg.n_sample_per_gpu,
        n_processes=cfg.n_worker,
        shared_mem=100 * 1000 * 1000 * 4)
    optimizer = chainermn.create_multi_node_optimizer(setup_optimizer(cfg),
                                                      comm)
    optimizer = optimizer.setup(train_chain)
    optimizer = add_hook_optimizer(optimizer, cfg)
    freeze_params(cfg, train_chain.model)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device,
                                                converter=converter)
    trainer = training.Trainer(updater, (cfg.solver.n_iteration, 'iteration'),
                               get_outdir(args.config))

    # extention
    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(training.extensions.LogReport(trigger=log_interval))
        trainer.extend(training.extensions.observe_lr(), trigger=log_interval)
        trainer.extend(training.extensions.PrintReport([
            'epoch',
            'iteration',
            'lr',
            'main/loss',
            'main/loss/loc',
            'main/loss/conf',
        ]),
                       trigger=log_interval)
        trainer.extend(training.extensions.ProgressBar(update_interval=10))

        trainer.extend(training.extensions.snapshot(),
                       trigger=(10000, 'iteration'))
        trainer.extend(training.extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(cfg.solver.n_iteration, 'iteration'))
        if args.tensorboard:
            trainer.extend(
                LogTensorboard(
                    ['lr', 'main/loss', 'main/loss/loc', 'main/loss/conf'],
                    trigger=(10, 'iteration'),
                    log_dir=get_logdir(args.config)))

    if len(cfg.solver.lr_step):
        trainer.extend(
            training.extensions.MultistepShift('lr', 0.1, cfg.solver.lr_step,
                                               cfg.solver.base_lr, optimizer))

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate should be multiplied by the number of gpu')
    parser.add_argument('--epoch', '-e', type=int, default=18)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12)
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    proposal_creator_params = FCISResNet101.proposal_creator_params
    proposal_creator_params['min_size'] = 2
    fcis = FCISResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        anchor_scales=(4, 8, 16, 32),
        pretrained_model='imagenet',
        iter2=False,
        proposal_creator_params=proposal_creator_params)
    fcis.use_preset('coco_evaluate')
    proposal_target_creator = ProposalTargetCreator()
    proposal_target_creator.neg_iou_thresh_lo = 0.0
    model = FCISTrainChain(fcis,
                           proposal_target_creator=proposal_target_creator)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # train dataset
    train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train')
    vmml_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                   split='valminusminival')

    # filter non-annotated data
    train_indices = np.array([
        i for i, label in enumerate(train_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                             dtype=np.int32)
    train_dataset = train_dataset.slice[train_indices]
    vmml_indices = np.array([
        i for i, label in enumerate(vmml_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                            dtype=np.int32)
    vmml_dataset = vmml_dataset.slice[vmml_indices]

    train_dataset = TransformDataset(
        ConcatenatedDataset(train_dataset, vmml_dataset),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    # test dataset
    if comm.rank == 0:
        test_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                       split='minival',
                                                       use_crowded=True,
                                                       return_crowded=True,
                                                       return_area=True)
        indices = np.arange(len(test_dataset))
        test_dataset = test_dataset.slice[indices]
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    @make_shift('lr')
    def lr_scheduler(trainer):
        base_lr = args.lr

        iteration = trainer.updater.iteration
        epoch = trainer.updater.epoch
        if (iteration * comm.size) < 2000:
            rate = 0.1
        elif epoch < args.cooldown_epoch:
            rate = 1
        else:
            rate = 0.1
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]
        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.fcis,
            label_names=coco_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Beispiel #19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data-dir', default='auto')
    parser.add_argument('--dataset', choices=('ade20k', 'cityscapes'))
    parser.add_argument('--model',
                        choices=('pspnet_resnet101', 'pspnet_resnet50'))
    parser.add_argument('--lr', default=1e-2)
    parser.add_argument('--batchsize', default=2, type=int)
    parser.add_argument('--out', default='result')
    parser.add_argument('--iteration', default=None, type=int)
    parser.add_argument('--communicator', default='hierarchical')
    args = parser.parse_args()

    dataset_cfgs = {
        'ade20k': {
            'input_size': (473, 473),
            'label_names': ade20k_semantic_segmentation_label_names,
            'iteration': 150000
        },
        'cityscapes': {
            'input_size': (713, 713),
            'label_names': cityscapes_semantic_segmentation_label_names,
            'iteration': 90000
        }
    }
    dataset_cfg = dataset_cfgs[args.dataset]

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    n_class = len(dataset_cfg['label_names'])
    if args.model == 'pspnet_resnet101':
        model = PSPNetResNet101(n_class,
                                pretrained_model='imagenet',
                                input_size=dataset_cfg['input_size'])
    elif args.model == 'pspnet_resnet50':
        model = PSPNetResNet50(n_class,
                               pretrained_model='imagenet',
                               input_size=dataset_cfg['input_size'])
    train_chain = create_mnbn_model(TrainChain(model), comm)
    model = train_chain.model
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        train_chain.to_gpu()

    if args.iteration is None:
        n_iter = dataset_cfg['iteration']
    else:
        n_iter = args.iteration

    if args.dataset == 'ade20k':
        train = ADE20KSemanticSegmentationDataset(data_dir=args.data_dir,
                                                  split='train')
        if comm.rank == 0:
            val = ADE20KSemanticSegmentationDataset(data_dir=args.data_dir,
                                                    split='val')
        label_names = ade20k_semantic_segmentation_label_names
    elif args.dataset == 'cityscapes':
        train = CityscapesSemanticSegmentationDataset(args.data_dir,
                                                      label_resolution='fine',
                                                      split='train')
        if comm.rank == 0:
            val = CityscapesSemanticSegmentationDataset(
                args.data_dir, label_resolution='fine', split='val')
        label_names = cityscapes_semantic_segmentation_label_names
    train = TransformDataset(train, ('img', 'label'),
                             Transform(model.mean, dataset_cfg['input_size']))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultiprocessIterator(
        train, batch_size=args.batchsize, n_processes=2)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(args.lr, 0.9), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(chainer.optimizer.WeightDecay(1e-4))
    for l in [
            model.ppm, model.head_conv1, model.head_conv2,
            train_chain.aux_conv1, train_chain.aux_conv2
    ]:
        for param in l.params():
            param.update_rule.add_hook(GradientScaling(10))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (n_iter, 'iteration'), args.out)
    trainer.extend(PolynomialShift('lr', 0.9, n_iter, optimizer=optimizer),
                   trigger=(1, 'iteration'))

    log_interval = 10, 'iteration'

    if comm.rank == 0:
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/miou', 'validation/main/mean_class_accuracy',
            'validation/main/pixel_accuracy'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.extend(extensions.snapshot_object(
            train_chain.model, 'snapshot_model_{.updater.iteration}.npz'),
                       trigger=(n_iter, 'iteration'))
        val_iter = chainer.iterators.SerialIterator(val,
                                                    batch_size=1,
                                                    repeat=False,
                                                    shuffle=False)
        trainer.extend(SemanticSegmentationEvaluator(val_iter, model,
                                                     label_names),
                       trigger=(n_iter, 'iteration'))

    trainer.run()
Beispiel #20
0
def main(config):
    opts = config()

    comm = chainermn.create_communicator(opts.communicator)
    device = comm.intra_rank

    backborn_cfg = opts.backborn_cfg

    df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1)

    ################### pseudo labeling #########################
    if opts.pseudo_labeling_path is not None:
        test_df = pd.read_csv(opts.path_data + opts.test_df)
        labels = np.load(opts.pseudo_labeling_path, allow_pickle=False)
        labels = np.concatenate((labels, labels))
        count = 0
        valid_array = []
        valid_sirna = []
        for i, label in enumerate(labels):
            if label.max() > 0.0013:
                count = count + 1
                valid_array.append(i)
                valid_sirna.append(label.argmax())
        print(count)
        pseudo_df = test_df.iloc[valid_array, :]
        pseudo_df["sirna"] = valid_sirna
        pseudo_df = pseudo_df
        df = pd.concat([df, pseudo_df]).sample(frac=1)
    ################### pseudo labeling #########################

    for i, (train_df, valid_df) in enumerate(
            stratified_groups_kfold(df,
                                    target=opts.fold_target,
                                    n_splits=opts.fold)):
        if comm.rank == 0:
            train_df.to_csv(
                opts.path_data + 'train' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            valid_df.to_csv(
                opts.path_data + 'valid' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            print("Save a csvfile of fold_" + str(i))
        dataset = opts.dataset
        train_dataset = dataset(train_df, opts.path_data)
        val_dataset = dataset(valid_df, opts.path_data)

        backborn = chcv2_get_model(
            backborn_cfg['name'],
            pretrained=backborn_cfg['pretrain'],
            in_size=opts.input_shape)[backborn_cfg['layer']]

        model = opts.model(backborn=backborn).copy(mode='init')
        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()

        mean = opts.mean

        train_data = TransformDataset(train_dataset, opts.train_transform)
        val_data = TransformDataset(val_dataset, opts.valid_trainsform)

        if comm.rank == 0:
            train_indices = train_data
            val_indices = val_data
        else:
            train_indices = None
            val_indices = None

        train_data = chainermn.scatter_dataset(train_indices,
                                               comm,
                                               shuffle=True)
        val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False)
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            opts.batchsize,
            shuffle=True,
            n_processes=opts.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val_data,
            opts.batchsize,
            repeat=False,
            shuffle=False,
            n_processes=opts.loaderjob)
        print('finished loading dataset')

        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()
        if opts.optimizer == "CorrectedMomentumSGD":
            optimizer = chainermn.create_multi_node_optimizer(
                CorrectedMomentumSGD(lr=opts.lr), comm)
        elif opts.optimizer == "NesterovAG":
            optimizer = chainermn.create_multi_node_optimizer(
                NesterovAG(lr=opts.lr), comm)
        else:
            optimizer = chainermn.create_multi_node_optimizer(
                Adam(alpha=opts.alpha,
                     weight_decay_rate=opts.weight_decay,
                     adabound=True,
                     final_lr=0.5), comm)

        optimizer.setup(model)
        if opts.optimizer == "CorrectedMomentumSGD":
            for param in model.params():
                if param.name not in ('beta', 'gamma'):
                    param.update_rule.add_hook(WeightDecay(opts.weight_decay))

        if opts.fc_lossfun == 'softmax_cross_entropy':
            fc_lossfun = F.softmax_cross_entropy
        elif opts.fc_lossfun == 'focal_loss':
            if opts.ls:
                focal_loss = FocalLoss(label_smoothing=True)
            else:
                focal_loss = FocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss':
            if opts.ls:
                focal_loss = AutoFocalLoss(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss_bce':
            if opts.ls:
                focal_loss = AutoFocalLossBCE(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        if opts.metric_lossfun == 'arcface':
            arcface = ArcFace()
            metric_lossfun = arcface.loss
        elif opts.metric_lossfun == 'adacos':
            adacos = AdaCos()
            metric_lossfun = adacos.loss

        updater = opts.updater(train_iter,
                               optimizer,
                               model,
                               device=device,
                               max_epoch=opts.max_epoch,
                               fix_sche=opts.fix_sche,
                               metric_lossfun=metric_lossfun,
                               fc_lossfun=fc_lossfun,
                               metric_w=opts.metric_w,
                               fc_w=opts.fc_w)
        evaluator = chainermn.create_multi_node_evaluator(
            opts.evaluator(val_iter,
                           model,
                           device=device,
                           max_epoch=opts.max_epoch,
                           fix_sche=opts.fix_sche,
                           metric_lossfun=metric_lossfun,
                           fc_lossfun=fc_lossfun,
                           metric_w=opts.metric_w,
                           fc_w=opts.fc_w), comm)

        trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'),
                                   out=opts.out + '_fold' + str(i))

        if opts.optimizer == "CorrectedMomentumSGD":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        elif opts.optimizer == "NesterovAG":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        else:
            trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))

        trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch'))
        #         trainer.extend(evaluator, trigger=(int(1), 'epoch'))
        log_interval = 0.1, 'epoch'
        print_interval = 0.1, 'epoch'

        if comm.rank == 0:
            trainer.extend(chainer.training.extensions.observe_lr(),
                           trigger=log_interval)
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model' + '_{.updater.epoch}.npz'),
                           trigger=(opts.max_epoch / 10, 'epoch'))
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model_f1max.npz'),
                           trigger=chainer.training.triggers.MaxValueTrigger(
                               'validation/main/accuracy',
                               trigger=(opts.max_epoch / 10, 'epoch')))
            trainer.extend(extensions.LogReport(trigger=log_interval))
            trainer.extend(extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/face_loss', 'main/ce_loss', 'main/accuracy',
                'validation/main/loss', 'validation/main/face_loss',
                'validation/main/ce_loss', 'validation/main/accuracy'
            ]),
                           trigger=print_interval)
            trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.run()
Beispiel #21
0
def do():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',choices=('ssd300','ssd512'),default='ssd300')
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--iteration', type=int, default=64)
    parser.add_argument('--step', type=int, nargs='*', default=[8,16])
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    model = SSD300(
        n_fg_class=len(ssdd.labels),
        pretrained_model='imagenet'
    )
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    """
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    """
    train = TransformDataset(
        train_dataset,
        Transform(model.coder,model.insize,model.mean),
    )
    train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize)

    test = test_dataset
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))
    
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=1e-3),
        trigger= triggers.ManualScheduleTrigger(args.step, 'iteration')
    )
    """
    trainer.extend(
        extensions.Evaluator(
            test_iter, model
        ),
        trigger=triggers.ManualScheduleTrigger(
            args.step + [args.iteration], 'iteration'
        )
    )
    """
    trainer.extend(extensions.ProgressBar(update_interval=1))
    #trainer.extend(extensions.LogReport(trigger=1))
    #trainer.extend(extensions.observe_lr(), trigger=1)
    #trainer.extend(extensions.PrintReport(
    #    ['epoch', 'iteration', 'lr',
    #    'main/loss', 'main/loss/loc', 'main/loss/conf',
    #    'validation/main/map', 'elapsed_time']),
    #    trigger=1)
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'main/loss/loc', 'main/loss/conf'],
                'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['validation/main/map'],
                'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.snapshot(
        filename='snapshot_iter_{.updater.epoch}.npz'), 
        trigger=(4, 'iteration')
    )

    trainer.run()
Beispiel #22
0
    return y_slice, x_slice


if __name__ == '__main__':
    from chainercv.visualizations import vis_bbox
    from chainercv.visualizations import vis_semantic_segmentation
    from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
    import matplotlib.pyplot as plt
    import numpy as np

    voc_segm = VOCSemanticSegmentationWithBboxDataset(split='aug')
    dataset = VOCSemanticSegmentationWithBboxDataset(
        split='aug').slice[:, ['img', 'bbox', 'label']]
    # transformed = TransformDataset(
    #     dataset, ('img', 'label_map'), grabcut_transform)
    transformed = TransformDataset(dataset, ('img', 'label_map'),
                                   SimpleDoesItTransform())

    indices = np.random.choice(np.arange(len(voc_segm)), size=(10, ))
    for index in indices:
        img, label_map, bbox, label = voc_segm[index]

        vis_bbox(img, bbox, label)
        plt.show()
        # see doc for better visualization
        vis_semantic_segmentation(img, label_map)
        plt.show()

        img, label_map = transformed[index]
        vis_semantic_segmentation(img,
                                  label_map,
                                  alpha=0.6,
def main():
    # Start the multiprocessing environment
    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    # Set up workspace
    # 12 GB GPU RAM for workspace
    chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024)

    # Setup the multi-node environment
    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank
    print(
        '==> Successfully setup communicator: "{}" rank: {} device: {} size: {}'
        .format(args.communicator, comm.rank, device, comm.size))
    set_random_seed(args, device)

    # Setup LR
    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256  # TODO: why?
        if comm.rank == 0:
            print(
                'LR = {} is selected based on the linear scaling rule'.format(
                    lr))

    # Setup dataset
    train_dir = os.path.join(args.dataset_dir, 'train')
    val_dir = os.path.join(args.dataset_dir, 'val')
    label_names = datasets.directory_parsing_label_names(train_dir)
    train_data = datasets.DirectoryParsingLabelDataset(train_dir)
    val_data = datasets.DirectoryParsingLabelDataset(val_dir)
    train_data = TransformDataset(train_data, ('img', 'label'),
                                  TrainTransform(_mean, args))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                ValTransform(_mean, args))
    print('==> [{}] Successfully finished loading dataset'.format(comm.rank))

    # Initializing dataset iterators
    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(train_indices,
                                              comm,
                                              shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(val_data,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_processes=args.loaderjob)

    # Create the model
    kwargs = {}
    if args.first_bn_mixed16 and args.dtype == 'float16':
        print('==> Setting the first BN layer to mixed16')
        kwargs['first_bn_mixed16'] = True

    # Initialize the model
    net = models.__dict__[args.arch](n_class=len(label_names), **kwargs)
    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in net.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    # Apply ada loss transform
    recorder = AdaLossRecorder(sample_per_n_iter=100)
    # Update the model to support AdaLoss
    net = AdaLossScaled(net,
                        init_scale=args.init_scale,
                        cfg={
                            'loss_scale_method': args.loss_scale_method,
                            'scale_upper_bound': args.scale_upper_bound,
                            'accum_upper_bound': args.accum_upper_bound,
                            'update_per_n_iteration':
                            args.update_per_n_iteration,
                            'recorder': recorder,
                        },
                        transforms=[
                            AdaLossTransformLinear(),
                            AdaLossTransformBottleneck(),
                            AdaLossTransformBasicBlock(),
                            AdaLossTransformConv2DBNActiv(),
                        ],
                        verbose=args.verbose)

    if comm.rank == 0:  # print network only in the 1-rank machine
        print(net)
    net = L.Classifier(net)
    hook = AdaLossMonitor(sample_per_n_iter=100,
                          verbose=args.verbose,
                          includes=['Grad', 'Deconvolution'])

    # Setup optimizer
    optim = chainermn.create_multi_node_optimizer(
        optimizers.CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    if args.dtype == 'mixed16':
        print('==> Using FP32 update for dtype=mixed16')
        optim.use_fp32_update()  # by default use fp32 update

        # HACK: support skipping update by existing loss scaling functionality
        if args.dynamic_interval is not None:
            optim.loss_scaling(interval=args.dynamic_interval, scale=None)
        else:
            optim.loss_scaling(interval=float('inf'), scale=None)
            optim._loss_scale_max = 1.0  # to prevent actual loss scaling

    optim.setup(net)

    # setup weight decay
    for param in net.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    # allocate model to multiple GPUs
    if device >= 0:
        chainer.cuda.get_device(device).use()
        net.to_gpu()

    # Create an updater that implements how to update based on one train_iter input
    updater = chainer.training.StandardUpdater(train_iter,
                                               optim,
                                               device=device)
    # Setup Trainer
    stop_trigger = (args.epoch, 'epoch')
    if args.iter is not None:
        stop_trigger = (args.iter, 'iteration')
    trainer = training.Trainer(updater, stop_trigger, out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        """ LR schedule for training ResNet especially.
        NOTE: lr should be within the context.
        """
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5  # NOTE: mentioned the original ResNet paper.
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, net, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        print('Using {} communicator'.format(args.communicator))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)

        # NOTE: may take snapshot every iteration now
        snapshot_label = 'epoch' if args.iter is None else 'iteration'
        snapshot_trigger = (args.snapshot_freq, snapshot_label)
        snapshot_filename = ('snapshot_' + snapshot_label + '_{.updater.' +
                             snapshot_label + '}.npz')
        trainer.extend(extensions.snapshot(filename=snapshot_filename),
                       trigger=snapshot_trigger)

        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_value(
            'loss_scale',
            lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
                       trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'loss_scale',
            'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    recorder.trainer = trainer
    hook.trainer = trainer
    with ExitStack() as stack:
        if comm.rank == 0:
            stack.enter_context(hook)
        trainer.run()

    # store recorded results
    if comm.rank == 0:  # NOTE: only export in the first rank
        recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
        hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Beispiel #24
0
def train_one_epoch(model, train_data, lr, gpu, batchsize, out):
    train_model = PixelwiseSoftmaxClassifier(model)
    if gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        train_model.to_gpu()  # Copy the model to the GPU
    log_trigger = (0.1, 'epoch')
    validation_trigger = (1, 'epoch')
    end_trigger = (1, 'epoch')

    train_data = TransformDataset(train_data, ('img', 'label_map'),
                                  SimpleDoesItTransform(model.mean))
    val = VOCSemanticSegmentationWithBboxDataset(
        split='val').slice[:, ['img', 'label_map']]

    # Iterator
    train_iter = iterators.MultiprocessIterator(train_data, batchsize)
    val_iter = iterators.MultiprocessIterator(val,
                                              1,
                                              shuffle=False,
                                              repeat=False,
                                              shared_mem=100000000)

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9)
    optimizer.setup(train_model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0001))

    # Updater
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=gpu)

    # Trainer
    trainer = training.Trainer(updater, end_trigger, out=out)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss'],
                                  x_key='iteration',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(['validation/main/miou'],
                                  x_key='iteration',
                                  file_name='miou.png'))

    trainer.extend(extensions.snapshot_object(model, filename='snapshot.npy'),
                   trigger=end_trigger)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss',
        'validation/main/miou', 'validation/main/mean_class_accuracy',
        'validation/main/pixel_accuracy'
    ]),
                   trigger=log_trigger)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(SemanticSegmentationEvaluator(
        val_iter, model, voc_semantic_segmentation_label_names),
                   trigger=validation_trigger)
    trainer.run()
Beispiel #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
        default='faster_rcnn_fpn_resnet50')
    parser.add_argument('--batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=90000)
    parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'faster_rcnn_fpn_resnet50':
        model = FasterRCNNFPNResNet50(
            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')
    elif args.model == 'faster_rcnn_fpn_resnet101':
        model = FasterRCNNFPNResNet101(
            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = TrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    train = TransformDataset(
        COCOBboxDataset(year='2017', split='train'),
        ('img', 'bbox', 'label'), transform)

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultithreadIterator(
        train, args.batchsize // comm.size)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    optimizer.add_hook(WeightDecay(0.0001))

    model.extractor.base.conv1.disable_update()
    model.extractor.base.res2.disable_update()
    for link in model.links():
        if isinstance(link, L.BatchNormalization):
            link.disable_update()

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(
        updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out)

    @make_shift('lr')
    def lr_schedule(trainer):
        base_lr = 0.02 * args.batchsize / 16
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = trainer.updater.iteration
        if iteration < warm_up_duration:
            rate = warm_up_rate \
                + (1 - warm_up_rate) * iteration / warm_up_duration
        else:
            rate = 1
            for step in args.step:
                if iteration >= step * 16 / args.batchsize:
                    rate *= 0.1

        return base_lr * rate

    trainer.extend(lr_schedule)

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'lr', 'main/loss',
             'main/loss/rpn/loc', 'main/loss/rpn/conf',
             'main/loss/head/loc', 'main/loss/head/conf']),
            trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(
            extensions.snapshot_object(
                model, 'model_iter_{.updater.iteration}'),
            trigger=(90000 * 16 / args.batchsize, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', type=int, default=1)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    faster_rcnn = FasterRCNNVGG16(
        n_fg_class=len(epic_kitchens_bbox_label_names),
        pretrained_model='imagenet')

    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = EpicKitchensBboxDataset(year='2018', split='train')
    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    train = TransformDataset(train, ('img', 'bbox', 'label', 'scale'),
                             Transform(faster_rcnn))

    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=args.batchsize)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (18, 'epoch'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss',
            'main/rpn_cls_loss'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.extend(extensions.snapshot_object(
            model.faster_rcnn, 'model_iter_{.updater.iteration}.npz'),
                       trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = EpicKitchensBboxDataset(year='2018', split='train')
    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'),
                             Transform(model.coder, model.insize, model.mean))

    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        n_processes=2)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (18, 'epoch'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}.npz'),
                       trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Beispiel #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--np', type=int, default=8)
    parser.add_argument('--test-batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    parser.add_argument('--dtype',
                        type=str,
                        choices=dtypes.keys(),
                        default='float32',
                        help='Select the data type of the model')
    parser.add_argument('--model-dir',
                        default=None,
                        type=str,
                        help='Where to store models')
    parser.add_argument('--dataset-dir',
                        default=None,
                        type=str,
                        help='Where to store datasets')
    parser.add_argument('--dynamic-interval',
                        default=None,
                        type=int,
                        help='Interval for dynamic loss scaling')
    parser.add_argument('--init-scale',
                        default=1,
                        type=float,
                        help='Initial scale for ada loss')
    parser.add_argument('--loss-scale-method',
                        default='approx_range',
                        type=str,
                        help='Method for adaptive loss scaling')
    parser.add_argument('--scale-upper-bound',
                        default=16,
                        type=float,
                        help='Hard upper bound for each scale factor')
    parser.add_argument('--accum-upper-bound',
                        default=1024,
                        type=float,
                        help='Accumulated upper bound for all scale factors')
    parser.add_argument('--update-per-n-iteration',
                        default=1,
                        type=int,
                        help='Update the loss scale value per n iteration')
    parser.add_argument('--snapshot-per-n-iteration',
                        default=10000,
                        type=int,
                        help='The frequency of taking snapshots')
    parser.add_argument('--n-uf', default=1e-3, type=float)
    parser.add_argument('--nosanity-check', default=False, action='store_true')
    parser.add_argument('--nouse-fp32-update',
                        default=False,
                        action='store_true')
    parser.add_argument('--profiling', default=False, action='store_true')
    parser.add_argument('--verbose',
                        action='store_true',
                        default=False,
                        help='Verbose output')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    # Set up workspace
    # 12 GB GPU RAM for workspace
    chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024)
    chainer.global_config.cv_resize_backend = 'cv2'

    # Setup the data type
    # when initializing models as follows, their data types will be casted.
    # Weethave to forbid the usage of cudnn
    if args.dtype != 'float32':
        chainer.global_config.use_cudnn = 'never'
    chainer.global_config.dtype = dtypes[args.dtype]
    print('==> Setting the data type to {}'.format(args.dtype))

    if args.model_dir is not None:
        chainer.dataset.set_dataset_root(args.model_dir)
    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')

    ######################################
    # Setup model
    #######################################
    # Apply ada loss transform
    recorder = AdaLossRecorder(sample_per_n_iter=100)
    profiler = Profiler()
    sanity_checker = SanityChecker(
        check_per_n_iter=100) if not args.nosanity_check else None
    # Update the model to support AdaLoss
    # TODO: refactorize
    model_ = AdaLossScaled(
        model,
        init_scale=args.init_scale,
        cfg={
            'loss_scale_method': args.loss_scale_method,
            'scale_upper_bound': args.scale_upper_bound,
            'accum_upper_bound': args.accum_upper_bound,
            'update_per_n_iteration': args.update_per_n_iteration,
            'recorder': recorder,
            'profiler': profiler,
            'sanity_checker': sanity_checker,
            'n_uf_threshold': args.n_uf,
            # 'power_of_two': False,
        },
        transforms=[
            AdaLossTransformLinear(),
            AdaLossTransformConvolution2D(),
        ],
        verbose=args.verbose)

    if comm.rank == 0:
        print(model)

    train_chain = MultiboxTrainChain(model_, comm=comm)
    chainer.cuda.get_device_from_id(device).use()

    # to GPU
    model.coder.to_gpu()
    model.extractor.to_gpu()
    model.multibox.to_gpu()

    shared_mem = 100 * 1000 * 1000 * 4

    if args.dataset_dir is not None:
        chainer.dataset.set_dataset_root(args.dataset_dir)
    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        ('img', 'mb_loc', 'mb_label'),
        Transform(model.coder,
                  model.insize,
                  model.mean,
                  dtype=dtypes[args.dtype]))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=8,
                                                        n_prefetch=2,
                                                        shared_mem=shared_mem)

    if comm.rank == 0:  # NOTE: only performed on the first device
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.test_batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    if args.dtype == 'mixed16':
        if not args.nouse_fp32_update:
            print('==> Using FP32 update for dtype=mixed16')
            optimizer.use_fp32_update()  # by default use fp32 update

        # HACK: support skipping update by existing loss scaling functionality
        if args.dynamic_interval is not None:
            optimizer.loss_scaling(interval=args.dynamic_interval, scale=None)
        else:
            optimizer.loss_scaling(interval=float('inf'), scale=None)
            optimizer._loss_scale_max = 1.0  # to prevent actual loss scaling

    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    # if args.dtype == 'mixed16':
    #     updater.loss_scale = 8
    iteration_interval = (args.iteration, 'iteration')

    trainer = training.Trainer(updater, iteration_interval, args.out)
    # trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
    #                trigger=triggers.ManualScheduleTrigger(
    #                    args.step, 'iteration'))
    if args.batchsize != 32:
        warmup_attr_ratio = 0.1
        # NOTE: this is confusing but it means n_iter
        warmup_n_epoch = 1000
        lr_shift = chainerlp.extensions.ExponentialShift(
            'lr',
            0.1,
            init=args.lr * warmup_attr_ratio,
            warmup_attr_ratio=warmup_attr_ratio,
            warmup_n_epoch=warmup_n_epoch,
            schedule=args.step)
        trainer.extend(lr_shift, trigger=(1, 'iteration'))

    if comm.rank == 0:
        if not args.profiling:
            trainer.extend(DetectionVOCEvaluator(
                test_iter,
                model,
                use_07_metric=True,
                label_names=voc_bbox_label_names),
                           trigger=triggers.ManualScheduleTrigger(
                               args.step + [args.iteration], 'iteration'))

        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.observe_value(
            'loss_scale',
            lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
                       trigger=log_interval)

        metrics = [
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]
        if args.dynamic_interval is not None:
            metrics.insert(2, 'loss_scale')

        trainer.extend(extensions.PrintReport(metrics), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(),
                       trigger=(args.snapshot_per_n_iteration, 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    hook = AdaLossMonitor(sample_per_n_iter=100,
                          verbose=args.verbose,
                          includes=['Grad', 'Deconvolution'])
    recorder.trainer = trainer
    hook.trainer = trainer

    with ExitStack() as stack:
        if comm.rank == 0:
            stack.enter_context(hook)
        trainer.run()

    # store recorded results
    if comm.rank == 0:  # NOTE: only export in the first rank
        recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
        profiler.export().to_csv(os.path.join(args.out, 'profile.csv'))
        if sanity_checker:
            sanity_checker.export().to_csv(
                os.path.join(args.out, 'sanity_check.csv'))
        hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--test-batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        ('img', 'mb_loc', 'mb_label'),
        Transform(model.coder, model.insize, model.mean))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=2)

    if comm.rank == 0:
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.test_batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step, 'iteration'))

    if comm.rank == 0:
        trainer.extend(DetectionVOCEvaluator(test_iter,
                                             model,
                                             use_07_metric=True,
                                             label_names=voc_bbox_label_names),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))

        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Beispiel #30
0
from darts.operations import *

from darts.cifar_transforms import cifar10_val_transform
from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
from darts.links.model import TrainChain

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--pretrained-model', type=str, default='model.npz')
    parser.add_argument('--gpu', type=int, default=-1)
    args = parser.parse_args()

    model = NetworkCIFAR(DARTS)
    chainer.serializers.load_npz(args.pretrained_model, model)
    chainer.global_config.train = False
    classifier = TrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        classifier.to_gpu()

    _, val = chainer.datasets.get_cifar10()
    val = TransformDataset(val, ('img', 'label'), cifar10_val_transform)

    it = chainer.iterators.SerialIterator(val, 32, False, False)
    evaluator = chainer.training.extensions.Evaluator(it,
                                                      classifier,
                                                      device=args.gpu)
    result = evaluator()
    print('Top 1 error {}%'.format(100 * float(1 - result['main/accuracy'])))