Example #1
0
 def init_trainer(self, trainer):
     if trainer.config['loss_function'] == 'default':
         trainer.loss_function = MixSoftmaxCrossEntropyLoss(aux=True)
     else:
         trainer.loss_function = getattr(
             gluoncv.loss, trainer.config['loss_function'])(
                 **trainer.config['loss_function_parameters'])
     trainer.lr_scheduler = gluoncv.utils.LRScheduler(
         mode='poly',
         baselr=trainer.config['learn_rate'],
         niters=len(trainer.dataloader),
         nepochs=50)
     trainer.model.model = DataParallelModel(trainer.model.model,
                                             self.ctx_list)
     trainer.loss_function = DataParallelCriterion(trainer.loss_function,
                                                   self.ctx_list)
     kv = mxnet.kv.create('local')
     optimizer = trainer.config['optimizer']
     if not optimizer in ['sgd']:
         optimizer = 'sgd'
     trainer.optimizer = gluon.Trainer(
         trainer.model.model.module.collect_params(),
         optimizer, {
             'lr_scheduler': trainer.lr_scheduler,
             'wd': 0.0001,
             'momentum': 0.9,
             'multi_precision': True
         },
         kvstore=kv)
Example #2
0
 def get_criterion(aux, aux_weight, focal_kwargs=None, sensitive_kwargs=None, ohem=False):
     if focal_kwargs:
         from mxnetseg.nn import FocalLoss
         return FocalLoss(**focal_kwargs)
     if sensitive_kwargs:
         raise NotImplementedError
     if ohem:
         from gluoncv.loss import MixSoftmaxCrossEntropyOHEMLoss
         return MixSoftmaxCrossEntropyOHEMLoss(aux, aux_weight)
     else:
         from gluoncv.loss import MixSoftmaxCrossEntropyLoss
         # from mxnetseg.nn import MixSoftmaxCrossEntropyLoss
         return MixSoftmaxCrossEntropyLoss(aux, aux_weight=aux_weight)
Example #3
0
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)
    if opt.label_smoothing:
        L = MixSoftmaxCrossEntropyLoss(sparse_label=False, aux_weight=0.4)
    else:
        L = MixSoftmaxCrossEntropyLoss(aux_weight=0.4)

    best_val_score = 1

    for epoch in range(opt.num_epochs):
        tic = time.time()
        if opt.use_rec:
            train_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        acc_top1_aux.reset()
        acc_top5_aux.reset()
        btic = time.time()

        for i, batch in enumerate(train_data):
            data, label = batch_fn(batch, ctx)
            if opt.label_smoothing:
                label_smooth = smooth(label, classes)
            else:
                label_smooth = label
            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [
                    L(yhat[0], yhat[1], y)
                    for yhat, y in zip(outputs, label_smooth)
                ]
            for l in loss:
                l.backward()
            lr_scheduler.update(i, epoch)
            trainer.step(batch_size)

            acc_top1.update(label, [o[0] for o in outputs])
            acc_top5.update(label, [o[0] for o in outputs])
            acc_top1_aux.update(label, [o[1] for o in outputs])
            acc_top5_aux.update(label, [o[1] for o in outputs])
            if opt.log_interval and not (i + 1) % opt.log_interval:
                _, top1 = acc_top1.get()
                _, top5 = acc_top5.get()
                _, top1_aux = acc_top1_aux.get()
                _, top5_aux = acc_top5_aux.get()
                err_top1, err_top5, err_top1_aux, err_top5_aux = (1 - top1,
                                                                  1 - top5,
                                                                  1 - top1_aux,
                                                                  1 - top5_aux)
                logger.info(
                    'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t'
                    'top1-err=%f\ttop5-err=%f\ttop1-err-aux=%f\ttop5-err-aux=%f'
                    % (epoch, i, batch_size * opt.log_interval /
                       (time.time() - btic), err_top1, err_top5, err_top1_aux,
                       err_top5_aux))
                btic = time.time()

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        _, top1_aux = acc_top1_aux.get()
        _, top5_aux = acc_top5_aux.get()
        err_top1, err_top5, err_top1_aux, err_top5_aux = (1 - top1, 1 - top5,
                                                          1 - top1_aux,
                                                          1 - top5_aux)

        err_top1_val, err_top5_val, err_top1_val_aux, err_top5_val_aux = test(
            ctx, val_data)

        logger.info(
            '[Epoch %d] training: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'
            % (epoch, err_top1, err_top5, err_top1_aux, err_top5_aux))
        logger.info('[Epoch %d] time cost: %f' % (epoch, time.time() - tic))
        logger.info(
            '[Epoch %d] validation: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'
            % (epoch, err_top1_val, err_top5_val, err_top1_val_aux,
               err_top5_val_aux))

        if err_top1_val < best_val_score and epoch > 50:
            best_val_score = err_top1_val
            net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params' %
                                (save_dir, best_val_score, model_name, epoch))

        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/imagenet-%s-%d.params' %
                                (save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/imagenet-%s-%d.params' %
                            (save_dir, model_name, opt.num_epochs - 1))
Example #4
0
plt.show()

##############################################################################
# Training Details
# ----------------
#
# - Training Losses:
#
#     We apply a standard per-pixel Softmax Cross Entropy Loss to train FCN. For Pascal
#     VOC dataset, we ignore the loss from boundary class (number 22).
#     Additionally, an Auxiliary Loss as in PSPNet [Zhao17]_ at Stage 3 can be enabled when
#     training with command ``--aux``. This will create an additional FCN "head" after Stage 3.
#
from gluoncv.loss import MixSoftmaxCrossEntropyLoss

criterion = MixSoftmaxCrossEntropyLoss(aux=True)

##############################################################################
# - Learning Rate and Scheduling:
#
#     We use different learning rate for FCN "head" and the base network. For the FCN "head",
#     we use :math:`10\times` base learning rate, because those layers are learned from scratch.
#     We use a poly-like learning rate scheduler for FCN training, provided in :class:`gluoncv.utils.LRScheduler`.
#     The learning rate is given by :math:`lr = baselr \times (1-iter)^{power}`
#
lr_scheduler = gluoncv.utils.LRScheduler(mode='poly',
                                         baselr=0.001,
                                         niters=len(train_data),
                                         nepochs=50)

##############################################################################
Example #5
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        # dataset and dataloader
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size,
            'root': args.dataset_dir
        }
        trainset = get_segmentation_dataset(args.dataset,
                                            split=args.train_split,
                                            mode='train',
                                            **data_kwargs)
        valset = get_segmentation_dataset(args.dataset,
                                          split='val',
                                          mode='val',
                                          **data_kwargs)
        self.train_data = gluon.data.DataLoader(trainset,
                                                args.batch_size,
                                                shuffle=True,
                                                last_batch='rollover',
                                                num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset,
                                               args.test_batch_size,
                                               last_batch='rollover',
                                               num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model,
                                           dataset=args.dataset,
                                           backbone=args.backbone,
                                           norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs,
                                           aux=args.aux,
                                           crop_size=args.crop_size)
        model.cast(args.dtype)
        print(model)
        self.net = DataParallelModel(model, args.ctx, args.syncbn)
        self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
        # create criterion
        criterion = MixSoftmaxCrossEntropyLoss(args.aux,
                                               aux_weight=args.aux_weight)
        self.criterion = DataParallelCriterion(criterion, args.ctx,
                                               args.syncbn)
        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly',
                                        base_lr=args.lr,
                                        nepochs=args.epochs,
                                        iters_per_epoch=len(self.train_data),
                                        power=0.9)
        kv = mx.kv.create(args.kvstore)
        optimizer_params = {
            'lr_scheduler': self.lr_scheduler,
            'wd': args.weight_decay,
            'momentum': args.momentum
        }
        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.module.collect_params(
                    '.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.module.collect_params(),
                                       'sgd',
                                       optimizer_params,
                                       kvstore=kv)
        # evaluation metrics
        self.metric = gluoncv.utils.metrics.SegmentationMetric(
            trainset.num_class)