def __init__(self, flag, batch_size,
                 use_global_stats=True,
                 checkpoint_interval=5,
                 epochs=50,
                 learning_rate=1.e-4,
                 momentum=0.9,
                 weight_decay=4.e-5,
                 train_OS=16,
                 train_split='train_aug',
                 val_split='val',
                 resume=None,
                 test_batch_size=None,
                 data_root=os.path.expanduser('~/.mxnet/datasets/voc'),
                 ctx=[mx.gpu()],
                 norm_layer=gluon.nn.BatchNorm,
                 num_workers=4):

        if test_batch_size is None:
            test_batch_size = batch_size

        self.running_flag = flag
        self.checkpoint_interval = checkpoint_interval
        self.batch_size = batch_size

        # dataset and dataloader
        train_dataset = VOCAugSegmentation(root=data_root, split=train_split)
        val_datset = VOCAugSegmentation(root=data_root, split=val_split)
        self.train_data = gluon.data.DataLoader(train_dataset, batch_size, shuffle=True, last_batch='rollover',
                                                num_workers=num_workers)
        self.eval_data = gluon.data.DataLoader(val_datset, test_batch_size,
                                               last_batch='keep', num_workers=num_workers)

        # create network
        model = DeepLabv3p(OS=train_OS, classes=21, use_global_stats=use_global_stats, norm_layer=norm_layer)
        print(model)

        # resume checkpoint if needed
        if resume is not None:
            if os.path.isfile(resume):
                model.load_parameters(resume, ctx=ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(resume))
        else:
            model.initialize(ctx=ctx)

        self.net = DataParallelModel(model, ctx, sync=True)
        self.evaluator = DataParallelModel(SegEvalModel(model), ctx)

        # create criterion
        self.criterion = DataParallelCriterion(SoftmaxCrossEntropyLoss(), ctx, sync=True)

        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly', baselr=learning_rate, niters=len(self.train_data),
                                        nepochs=epochs)
        self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd',
                                       {'lr_scheduler': self.lr_scheduler,
                                        'wd': weight_decay,
                                        'momentum': momentum,
                                        'multi_precision': True})
Example #2
0
 def init_trainer(self, trainer):
     if trainer.config['loss_function'] == 'default':
         trainer.loss_function = MixSoftmaxCrossEntropyLoss(aux=True)
     else:
         trainer.loss_function = getattr(
             gluoncv.loss, trainer.config['loss_function'])(
                 **trainer.config['loss_function_parameters'])
     trainer.lr_scheduler = gluoncv.utils.LRScheduler(
         mode='poly',
         baselr=trainer.config['learn_rate'],
         niters=len(trainer.dataloader),
         nepochs=50)
     trainer.model.model = DataParallelModel(trainer.model.model,
                                             self.ctx_list)
     trainer.loss_function = DataParallelCriterion(trainer.loss_function,
                                                   self.ctx_list)
     kv = mxnet.kv.create('local')
     optimizer = trainer.config['optimizer']
     if not optimizer in ['sgd']:
         optimizer = 'sgd'
     trainer.optimizer = gluon.Trainer(
         trainer.model.model.module.collect_params(),
         optimizer, {
             'lr_scheduler': trainer.lr_scheduler,
             'wd': 0.0001,
             'momentum': 0.9,
             'multi_precision': True
         },
         kvstore=kv)
Example #3
0
 def __init__(self, args):
     self.args = args
     # image transform
     input_transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
     ])
     # dataset and dataloader
     trainset = get_segmentation_dataset(
         args.dataset, split='train', transform=input_transform)
     valset = get_segmentation_dataset(
         args.dataset, split='val', transform=input_transform)
     self.train_data = gluon.data.DataLoader(
         trainset, args.batch_size, shuffle=True, last_batch='rollover',
         num_workers=args.workers)
     self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size,
         last_batch='keep', num_workers=args.workers)
     # create network
     model = get_segmentation_model(model=args.model, dataset=args.dataset,
                                    backbone=args.backbone, norm_layer=args.norm_layer,
                                    aux=args.aux, norm_kwargs=args.norm_kwargs)
     # model.hybridize(static_alloc=True, static_shape=True)
     print(model)
     self.net = DataParallelModel(model, args.ctx, args.syncbn)
     self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
     # resume checkpoint if needed
     if args.resume is not None:
         if os.path.isfile(args.resume):
             model.load_params(args.resume, ctx=args.ctx)
         else:
             raise RuntimeError("=> no checkpoint found at '{}'" \
                 .format(args.resume))
     # create criterion
     criterion = SoftmaxCrossEntropyLossWithAux(args.aux)
     self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn)
     # optimizer and lr scheduling
     self.lr_scheduler = LRScheduler(mode='poly', baselr=args.lr,
                                     niters=len(self.train_data), 
                                     nepochs=args.epochs)
     kv = mx.kv.create(args.kvstore)
     self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd',
                                    {'lr_scheduler': self.lr_scheduler,
                                     'wd':args.weight_decay,
                                     'momentum': args.momentum,
                                     'multi_precision': True},
                                     kvstore = kv)
 def test_net_sync(net, criterion, sync, nDevices):
     ctx_list = [mx.cpu(0) for i in range(nDevices)]
     net = DataParallelModel(net, ctx_list, sync=sync)
     criterion = DataParallelCriterion(criterion, ctx_list, sync=sync)
     iters = 100
     # train mode
     for i in range(iters):
         x = mx.random.uniform(shape=(8, 1, 28, 28))
         t = nd.ones(shape=(8))
         with autograd.record():
             y = net(x)
             loss = criterion(y, t)
             autograd.backward(loss)
     # evaluation mode
     for i in range(iters):
         x = mx.random.uniform(shape=(8, 1, 28, 28))
         y = net(x)
def test(args):
    # output folder
    outdir = 'train_logs/outdir'
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    # dataset and dataloader
    testset = get_custom_segm_dataset("test", args)
    test_data = gluon.data.DataLoader(
        testset,
        args.test_batch_size,
        shuffle=False,
        last_batch='keep',
        batchify_fn=ms_batchify_fn if args.tta else None,
        num_workers=args.workers)
    # create network
    if args.model_zoo is not None:
        model = get_pretrained_segmentation_model(args)
        if args.resume is not None:
            resume_checkpoint(model, args)
            print("loading checkpoint from %s for testing" % args.resume)
    else:
        model = get_segmentation_model(model=args.model,
                                       dataset=args.dataset,
                                       ctx=args.ctx,
                                       backbone=args.backbone,
                                       norm_layer=args.norm_layer,
                                       norm_kwargs=args.norm_kwargs,
                                       aux=args.aux,
                                       base_size=args.base_size,
                                       crop_size=args.crop_size)
        # load pretrained weight
        assert args.resume is not None, '=> Please provide the checkpoint using --resume'
        resume_checkpoint(model, args)
    # print(model)
    if args.tta:
        evaluator = MultiEvalModel(model,
                                   testset.num_class,
                                   ctx_list=args.ctx,
                                   scales=[0.75, 1.0, 1.25, 1.5, 1.75])
    else:
        evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
    metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class)

    tbar = tqdm(test_data)
    for i, (data, dsts) in enumerate(tbar):
        if args.eval:
            if args.tta:
                predicts = [
                    pred[0].expand_dims(0)
                    for pred in evaluator.parallel_forward(data)
                ]
                targets = [target.as_in_context(predicts[0].context).expand_dims(0) \
                        for target in dsts]
            else:
                data = data.astype(args.dtype, copy=False)
                predicts = evaluator(data)
                predicts = [x[0] for x in predicts]
                if args.test_flip:
                    assert (data.ndim == 4)
                    fdata = data.flip(3)
                    fpredicts = evaluator(fdata)
                    predicts = [(x + y[0].flip(3)) / 2
                                for x, y in zip(predicts, fpredicts)]
                targets = mx.gluon.utils.split_and_load(dsts,
                                                        args.ctx,
                                                        even_split=False)
            metric.update(targets, predicts)
            pixAcc, mIoU = metric.get()
            tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
            mx.nd.waitall()
        else:
            im_paths = dsts
            predicts = evaluator.parallel_forward(data)
            for predict, impath in zip(predicts, im_paths):
                predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 1)).asnumpy() + \
                    testset.pred_offset
                mask = get_color_pallete(predict, args.dataset)
                outname = os.path.splitext(impath)[0] + '.png'
                mask.save(os.path.join(outdir, outname))
Example #6
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        # dataset and dataloader
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size,
            'root': args.dataset_dir
        }
        trainset = get_segmentation_dataset(args.dataset,
                                            split=args.train_split,
                                            mode='train',
                                            **data_kwargs)
        valset = get_segmentation_dataset(args.dataset,
                                          split='val',
                                          mode='val',
                                          **data_kwargs)
        self.train_data = gluon.data.DataLoader(trainset,
                                                args.batch_size,
                                                shuffle=True,
                                                last_batch='rollover',
                                                num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset,
                                               args.test_batch_size,
                                               last_batch='rollover',
                                               num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model,
                                           dataset=args.dataset,
                                           backbone=args.backbone,
                                           norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs,
                                           aux=args.aux,
                                           crop_size=args.crop_size)
        model.cast(args.dtype)
        print(model)
        self.net = DataParallelModel(model, args.ctx, args.syncbn)
        self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
        # create criterion
        criterion = MixSoftmaxCrossEntropyLoss(args.aux,
                                               aux_weight=args.aux_weight)
        self.criterion = DataParallelCriterion(criterion, args.ctx,
                                               args.syncbn)
        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly',
                                        base_lr=args.lr,
                                        nepochs=args.epochs,
                                        iters_per_epoch=len(self.train_data),
                                        power=0.9)
        kv = mx.kv.create(args.kvstore)
        optimizer_params = {
            'lr_scheduler': self.lr_scheduler,
            'wd': args.weight_decay,
            'momentum': args.momentum
        }
        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.module.collect_params(
                    '.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.module.collect_params(),
                                       'sgd',
                                       optimizer_params,
                                       kvstore=kv)
        # evaluation metrics
        self.metric = gluoncv.utils.metrics.SegmentationMetric(
            trainset.num_class)