def convert_from_gluon(model_name, image_shape, classes=1000, logger=None):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    model_path = os.path.join(dir_path, 'model')
    if logger is not None:
        logger.info('Converting model from Gluon-CV ModelZoo %s... into path %s' % (model_name, model_path))
    net = get_model(name=model_name, classes=classes, pretrained=True)
    net.hybridize()
    x = mx.sym.var('data')
    y = net(x)
    y = mx.sym.SoftmaxOutput(data=y, name='softmax')
    symnet = mx.symbol.load_json(y.tojson())
    params = net.collect_params()
    args = {}
    auxs = {}    
    for param in params.values():
        v = param._reduce()
        k = param.name
        if 'running' in k:
            auxs[k] = v
        else:
            args[k] = v            
    mod = mx.mod.Module(symbol=symnet, context=mx.cpu(),
                        label_names = ['softmax_label'])
    mod.bind(for_training=False, 
             data_shapes=[('data', (1,) + 
                          tuple([int(i) for i in image_shape.split(',')]))])
    mod.set_params(arg_params=args, aux_params=auxs)
    dst_dir = os.path.join(dir_path, 'model')
    prefix = os.path.join(dir_path, 'model', model_name)
    if not os.path.isdir(dst_dir):
        os.mkdir(dst_dir)       
    mod.save_checkpoint(prefix, 0)
    return prefix
Exemple #2
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        # dataset and dataloader
        data_kwargs = {'transform': input_transform, 'base_size': args.base_size,
                       'crop_size': args.crop_size}
        trainset = get_segmentation_dataset(
            args.dataset, split=args.train_split, mode='train', **data_kwargs)
        valset = get_segmentation_dataset(
            args.dataset, split='val', mode='val', **data_kwargs)
        self.train_data = gluon.data.DataLoader(
            trainset, args.batch_size, shuffle=True, last_batch='rollover',
            num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size,
            last_batch='rollover', num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model, dataset=args.dataset,
                                           backbone=args.backbone, norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs, aux=args.aux,
                                           crop_size=args.crop_size)
        model.cast(args.dtype)
        print(model)
        self.net = DataParallelModel(model, args.ctx, args.syncbn)
        self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'" \
                    .format(args.resume))
        # create criterion
        criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight)
        self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn)
        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly', baselr=args.lr,
                                        niters=len(self.train_data), 
                                        nepochs=args.epochs)
        kv = mx.kv.create(args.kvstore)
        optimizer_params = {'lr_scheduler': self.lr_scheduler,
                            'wd':args.weight_decay,
                            'momentum': args.momentum}
        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.module.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd',
                                       optimizer_params, kvstore = kv)
        # evaluation metrics
        self.metric = gluoncv.utils.metrics.SegmentationMetric(trainset.num_class)
    def __init__(self, num_clothes, num_colors, ctx):
        super(fashion_net_2_branches, self).__init__()
        self._features = model_zoo.get_model('mobilenetv2_1.0', pretrained=True, ctx = ctx).features
        for _, w in self._features.collect_params().items():
            w.grad_req = 'null'
			
        self._flatten = nn.Flatten()
        self._relu = nn.Activation(activation='relu')
        self._swish = nn.Swish()
        
        self._clothes_fc_1 = nn.Dense(100)
        self._clothes_bn = nn.BatchNorm(center=False, scale=True)        
        self._clothes_out = nn.Dense(num_clothes)
        
        self._clothes_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._clothes_bn.initialize(init=init.Zero(), ctx=ctx)
        self._clothes_out.initialize(init=init.Xavier(), ctx=ctx)
		
        self._color_fc_1 = nn.Dense(100)
        self._color_bn_1 = nn.BatchNorm(center=False, scale=True)
        self._color_fc_2 = nn.Dense(50)
        self._color_bn_2 = nn.BatchNorm(center=False, scale=True)
        self._color_out = nn.Dense(num_colors)
		
        self._color_fc_1.initialize(init=init.Xavier(), ctx=ctx)
        self._color_bn_1.initialize(init=init.Zero(), ctx=ctx)
        self._color_fc_2.initialize(init=init.Xavier(), ctx=ctx)
        self._color_bn_2.initialize(init=init.Zero(), ctx=ctx)
        self._color_out.initialize(init=init.Xavier(), ctx=ctx)
Exemple #4
0
# tutorial. In addition, we use the the idea of temporal segments (TSN) [Wang16]_
# to wrap the backbone VGG16 network for adaptation to video domain.
#
# `TSN <https://arxiv.org/abs/1608.00859>`_ is a widely adopted video
# classification method. It is proposed to incorporate temporal information from an entire video.
# The idea is straightforward: we can evenly divide the video into several segments,
# process each segment individually, obtain segmental consensus from each segment, and perform
# final prediction. TSN is more like a general algorithm, rather than a specific network architecture.
# It can work with both 2D and 3D neural networks.

# number of GPUs to use
num_gpus = 1
ctx = [mx.gpu(i) for i in range(num_gpus)]

# Get the model vgg16_ucf101 with temporal segment network, with 101 output classes, without pre-trained weights
net = get_model(name='vgg16_ucf101', nclass=101, num_segments=3)
net.collect_params().reset_ctx(ctx)
print(net)

################################################################
# Data Augmentation and Data Loader
# ---------------------------------
#
# Data augmentation for video is different from image. For example, if you
# want to randomly crop a video sequence, you need to make sure all the video
# frames in this sequence undergo the same cropping process. We provide a
# new set of transformation functions, working with multiple images.
# Please checkout the `video.py <../../../gluoncv/data/transforms/video.py>`_ for more details.
# Most video data augmentation strategies used here are introduced in [Wang15]_.

transform_train = transforms.Compose([
# transpose it to `num_channels*height*width`,
# and normalize with mean and standard deviation calculated across all CIFAR10 images.
#
# What does the transformed image look like?

img = transform_fn(img)
plt.imshow(nd.transpose(img, (1,2,0)).asnumpy())
plt.show()

################################################################
# Can't recognize anything? *Don't panic!* Neither do I.
# The transformation makes it more "model-friendly", instead of "human-friendly".
#
# Next, we load a pre-trained model.

net = get_model('cifar_resnet110_v1', classes=10, pretrained=True)

################################################################
#
# Finally, we prepare the image and feed it to the model

pred = net(img.expand_dims(axis=0))

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']
ind = nd.argmax(pred, axis=1).astype('int')
print('The input picture is classified as [%s], with probability %.3f.'%
      (class_names[ind.asscalar()], nd.softmax(pred)[0][ind].asscalar()))

################################################################
# Play with the scripts
Exemple #6
0
    if len(upscale_bbox) > 0:
        predicted_heatmap = pose_net(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

        axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                              box_thresh=0.5, keypoint_thresh=0.2, ax=axes)
        plt.draw()
        plt.pause(0.001)
    else:
        axes = plot_image(frame, ax=axes)
        plt.draw()
        plt.pause(0.001)

    return axes

if __name__ == '__main__':
    ctx = mx.cpu()
    detector_name = "ssd_512_mobilenet1.0_coco"
    detector = get_model(detector_name, pretrained=True, ctx=ctx)
    detector.reset_class(classes=['person'], reuse_weights={'person':'person'})
    net = get_model('simple_pose_resnet18_v1b', pretrained='ccd24037', ctx=ctx)

    cap = cv2.VideoCapture(0)
    time.sleep(1)  ### letting the camera autofocus
    axes = None

    for i in range(opt.num_frames):
        ret, frame = cap.read()
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
        axes = keypoint_detection(frame, detector, net, ctx, axes=axes)
def main():
    opt = parse_args()

    filehandler = logging.FileHandler(opt.logging_file)
    streamhandler = logging.StreamHandler()

    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)

    logger.info(opt)

    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size

    lr_scheduler = LRSequential([
        LRScheduler('linear', base_lr=0, target_lr=opt.lr,
                    nepochs=opt.warmup_epochs, iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay, power=2)
    ])

    model_name = opt.model

    kwargs = {'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes}
    if opt.use_gn:
        from gluoncv.nn import GroupNorm
        kwargs['norm_layer'] = GroupNorm
    if model_name.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm
    elif model_name.startswith('resnext'):
        kwargs['use_se'] = opt.use_se

    if opt.last_gamma:
        kwargs['last_gamma'] = True

    optimizer = 'nag'
    optimizer_params = {'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler}
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    net = get_model(model_name, **kwargs)
    net.cast(opt.dtype)
    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx = context)

    # teacher model for distillation training
    if opt.teacher is not None and opt.hard_weight < 1.0:
        teacher_name = opt.teacher
        teacher = get_model(teacher_name, pretrained=True, classes=classes, ctx=context)
        teacher.cast(opt.dtype)
        distillation = True
    else:
        distillation = False

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers):
        rec_train = os.path.expanduser(rec_train)
        rec_train_idx = os.path.expanduser(rec_train_idx)
        rec_val = os.path.expanduser(rec_val)
        rec_val_idx = os.path.expanduser(rec_val_idx)
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))
        mean_rgb = [123.68, 116.779, 103.939]
        std_rgb = [58.393, 57.12, 57.375]

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            return data, label

        train_data = mx.io.ImageRecordIter(
            path_imgrec         = rec_train,
            path_imgidx         = rec_train_idx,
            preprocess_threads  = num_workers,
            shuffle             = True,
            batch_size          = batch_size,

            data_shape          = (3, input_size, input_size),
            mean_r              = mean_rgb[0],
            mean_g              = mean_rgb[1],
            mean_b              = mean_rgb[2],
            std_r               = std_rgb[0],
            std_g               = std_rgb[1],
            std_b               = std_rgb[2],
            rand_mirror         = True,
            random_resized_crop = True,
            max_aspect_ratio    = 4. / 3.,
            min_aspect_ratio    = 3. / 4.,
            max_random_area     = 1,
            min_random_area     = 0.08,
            brightness          = jitter_param,
            saturation          = jitter_param,
            contrast            = jitter_param,
            pca_noise           = lighting_param,
        )
        val_data = mx.io.ImageRecordIter(
            path_imgrec         = rec_val,
            path_imgidx         = rec_val_idx,
            preprocess_threads  = num_workers,
            shuffle             = False,
            batch_size          = batch_size,

            resize              = resize,
            data_shape          = (3, input_size, input_size),
            mean_r              = mean_rgb[0],
            mean_g              = mean_rgb[1],
            mean_b              = mean_rgb[2],
            std_r               = std_rgb[0],
            std_g               = std_rgb[1],
            std_b               = std_rgb[2],
        )
        return train_data, val_data, batch_fn

    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                        saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(resize, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize
        ])

        train_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train),
            batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
        val_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test),
            batch_size=batch_size, shuffle=False, num_workers=num_workers)

        return train_data, val_data, batch_fn

    if opt.use_rec:
        train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx,
                                                    opt.rec_val, opt.rec_val_idx,
                                                    batch_size, num_workers)
    else:
        train_data, val_data, batch_fn = get_data_loader(opt.data_dir, batch_size, num_workers)

    if opt.mixup:
        train_metric = mx.metric.RMSE()
    else:
        train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def mixup_transform(label, classes, lam=1, eta=0.0):
        if isinstance(label, nd.NDArray):
            label = [label]
        res = []
        for l in label:
            y1 = l.one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes)
            y2 = l[::-1].one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes)
            res.append(lam*y1 + (1-lam)*y2)
        return res

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            res = l.one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes)
            smoothed.append(res)
        return smoothed

    def test(ctx, val_data):
        if opt.use_rec:
            val_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (1-top1, 1-top5)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params is '':
            net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)
        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(temperature=opt.temperature,
                                                                 hard_weight=opt.hard_weight,
                                                                 sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss)

        best_val_score = 1

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam*X + (1-lam)*X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                                    for X in data]

                with ag.record():
                    outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                    if distillation:
                        loss = [L(yhat.astype('float32', copy=False),
                                  y.astype('float32', copy=False),
                                  p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)]
                    else:
                        loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i+1)%opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'%(
                                epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                                train_metric_name, train_metric_score, trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i /(time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data)

            logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f'%(epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch))
                trainer.save_states('%s/%.4f-imagenet-%s-%d-best.states'%(save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
            trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))


    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
        if distillation:
            teacher.hybridize(static_alloc=True, static_shape=True)
    train(context)
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]
    args.batch_size = len(ctx)  # 1 batch per device

    # network
    kwargs = {}
    module_list = []
    if args.use_fpn:
        module_list.append('fpn')
    if args.norm_layer is not None:
        module_list.append(args.norm_layer)
        if args.norm_layer == 'bn':
            kwargs['num_devices'] = len(args.gpus.split(','))
    net_name = '_'.join(('faster_rcnn', *module_list, args.network, args.dataset))
    args.save_prefix += net_name
    net = get_model(net_name, pretrained_base=True, **kwargs)
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
    else:
        for param in net.collect_params().values():
            if param._data is not None:
                continue
            param.initialize()
    net.collect_params().reset_ctx(ctx)

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
    train_data, val_data = get_dataloader(
        net, train_dataset, val_dataset, FasterRCNNDefaultTrainTransform,
        FasterRCNNDefaultValTransform, args.batch_size, args.num_workers, args.use_fpn)
Exemple #9
0
    print('hello')
    import os.path
    print(args.train)
    print(
        os.path.isfile(os.path.join(args.train, 'birds_ssd_sample_train.rec')))

    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]

    # network
    net_name = '_'.join(('yolo3', args.network, args.dataset))
    args.save_prefix += net_name
    # use sync bn if specified
    if args.syncbn and len(ctx) > 1:
        net = get_model(net_name,
                        pretrained_base=True,
                        norm_layer=gluon.contrib.nn.SyncBatchNorm,
                        norm_kwargs={'num_devices': len(ctx)})
        async_net = get_model(net_name,
                              pretrained_base=False)  # used by cpu worker
    else:
        net = get_model(net_name, pretrained_base=True)
        async_net = net
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
        async_net.load_parameters(args.resume.strip())
    else:
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            net.initialize()
            async_net.initialize()
Exemple #10
0
                                [pos[j, 1], pos[j_parent, 1]],
                                [pos[j, 2], pos[j_parent, 2]],
                                zdir='z',
                                c=col)

    if save:
        plt.savefig(save_path)


ckpt_dir = '../../checkpoint/detectron_pt_coco'
ckpt_name = 'arc_1_ch_1024_epoch_40.bin'
filter_widths = [1, 1, 1]
pose3d_predictor = get_pose3d_predictor(ckpt_dir, ckpt_name, filter_widths)

detector_name = ['yolo3_mobilenet1.0_coco', 'yolo3_darknet53_coco']
detector = model_zoo.get_model(detector_name[0], pretrained=True)
pose_net = model_zoo.get_model('alpha_pose_resnet101_v1b_coco',
                               pretrained=True)

# reset the detector to only detect human
# noinspection PyUnresolvedReferences
detector.reset_class(['person'], reuse_weights=['person'])


def predict(img_path):
    # 1.预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)

    start = time.time()

    # detect persons and bbox
if __name__ == '__main__':
    args = parse_args()
    # fix seed for mxnet, numpy and python builtin random generator.
    gutils.random.seed(args.seed)

    # training contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]

    # network
    net_name = '_'.join(('yolo3', args.network, args.dataset))
    args.save_prefix += net_name
    # use sync bn if specified
    num_sync_bn_devices = len(ctx) if args.syncbn else -1
    if num_sync_bn_devices > 1:
        net = get_model(net_name, pretrained_base=True, num_sync_bn_devices=num_sync_bn_devices)
        async_net = get_model(net_name, pretrained_base=False)  # used by cpu worker
    else:
        net = get_model(net_name, pretrained_base=True)
        async_net = net
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
        async_net.load_parameters(args.resume.strip())
    else:
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            net.initialize()
            async_net.initialize()

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
def main():
    opt = parse_args()

    filehandler = logging.FileHandler(opt.logging_file)
    streamhandler = logging.StreamHandler()

    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)

    logger.info(opt)

    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size

    lr_scheduler = LRSequential([
        LRScheduler('linear', base_lr=0, target_lr=opt.lr,,
                    nepochs=opt.warmup_epochs, iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay, power=2)
    ])

    model_name = opt.model

    kwargs = {'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes}
    if model_name.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm
    elif model_name.startswith('resnext'):
        kwargs['use_se'] = opt.use_se

    optimizer = 'nag'
    optimizer_params = {'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler}
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    net = get_model(model_name, **kwargs)
    net.cast(opt.dtype)

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers):
        rec_train = os.path.expanduser(rec_train)
        rec_train_idx = os.path.expanduser(rec_train_idx)
        rec_val = os.path.expanduser(rec_val)
        rec_val_idx = os.path.expanduser(rec_val_idx)
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        mean_rgb = [123.68, 116.779, 103.939]
        std_rgb = [58.393, 57.12, 57.375]

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            return data, label

        train_data = mx.io.ImageRecordIter(
            path_imgrec         = rec_train,
            path_imgidx         = rec_train_idx,
            preprocess_threads  = num_workers,
            shuffle             = True,
            batch_size          = batch_size,

            data_shape          = (3, input_size, input_size),
            mean_r              = mean_rgb[0],
            mean_g              = mean_rgb[1],
            mean_b              = mean_rgb[2],
            std_r               = std_rgb[0],
            std_g               = std_rgb[1],
            std_b               = std_rgb[2],
            rand_mirror         = True,
            random_resized_crop = True,
            max_aspect_ratio    = 4. / 3.,
            min_aspect_ratio    = 3. / 4.,
            max_random_area     = 1,
            min_random_area     = 0.08,
            brightness          = jitter_param,
            saturation          = jitter_param,
            contrast            = jitter_param,
            pca_noise           = lighting_param,
        )
        val_data = mx.io.ImageRecordIter(
            path_imgrec         = rec_val,
            path_imgidx         = rec_val_idx,
            preprocess_threads  = num_workers,
            shuffle             = False,
            batch_size          = batch_size,

            resize              = 256,
            data_shape          = (3, input_size, input_size),
            mean_r              = mean_rgb[0],
            mean_g              = mean_rgb[1],
            mean_b              = mean_rgb[2],
            std_r               = std_rgb[0],
            std_g               = std_rgb[1],
            std_b               = std_rgb[2],
        )
        return train_data, val_data, batch_fn

    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                        saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(256, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize
        ])

        train_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train),
            batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
        val_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test),
            batch_size=batch_size, shuffle=False, num_workers=num_workers)

        return train_data, val_data, batch_fn

    if opt.use_rec:
        train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx,
                                                    opt.rec_val, opt.rec_val_idx,
                                                    batch_size, num_workers)
    else:
        train_data, val_data, batch_fn = get_data_loader(opt.data_dir, batch_size, num_workers)

    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    acc_top1_aux = mx.metric.Accuracy()
    acc_top5_aux = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            ind = l.astype('int')
            res = nd.zeros((ind.shape[0], classes), ctx = l.context)
            res += eta/classes
            res[nd.arange(ind.shape[0], ctx = l.context), ind] = 1 - eta + eta/classes
            smoothed.append(res)
        return smoothed

    def test(ctx, val_data):
        if opt.use_rec:
            val_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        acc_top1_aux.reset()
        acc_top5_aux.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, [o[0] for o in outputs])
            acc_top5.update(label, [o[0] for o in outputs])
            acc_top1_aux.update(label, [o[1] for o in outputs])
            acc_top5_aux.update(label, [o[1] for o in outputs])

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        _, top1_aux = acc_top1_aux.get()
        _, top5_aux = acc_top5_aux.get()
        return (1-top1, 1-top5, 1-top1_aux, 1-top5_aux)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)
        if opt.label_smoothing:
            L = MixSoftmaxCrossEntropyLoss(sparse_label=False, aux_weight=0.4)
        else:
            L = MixSoftmaxCrossEntropyLoss(aux_weight=0.4)

        best_val_score = 1

        for epoch in range(opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            acc_top1.reset()
            acc_top5.reset()
            acc_top1_aux.reset()
            acc_top5_aux.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)
                if opt.label_smoothing:
                    label_smooth = smooth(label, classes)
                else:
                    label_smooth = label
                with ag.record():
                    outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                    loss = [L(yhat[0], yhat[1], y) for yhat, y in zip(outputs, label_smooth)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)

                acc_top1.update(label, [o[0] for o in outputs])
                acc_top5.update(label, [o[0] for o in outputs])
                acc_top1_aux.update(label, [o[1] for o in outputs])
                acc_top5_aux.update(label, [o[1] for o in outputs])
                if opt.log_interval and not (i+1)%opt.log_interval:
                    _, top1 = acc_top1.get()
                    _, top5 = acc_top5.get()
                    _, top1_aux = acc_top1_aux.get()
                    _, top5_aux = acc_top5_aux.get()
                    err_top1, err_top5, err_top1_aux, err_top5_aux = (1-top1, 1-top5, 1-top1_aux, 1-top5_aux)
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t'
                                'top1-err=%f\ttop5-err=%f\ttop1-err-aux=%f\ttop5-err-aux=%f'%(
                                epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                                err_top1, err_top5, err_top1_aux, err_top5_aux))
                    btic = time.time()

            _, top1 = acc_top1.get()
            _, top5 = acc_top5.get()
            _, top1_aux = acc_top1_aux.get()
            _, top5_aux = acc_top5_aux.get()
            err_top1, err_top5, err_top1_aux, err_top5_aux = (1-top1, 1-top5, 1-top1_aux, 1-top5_aux)

            err_top1_val, err_top5_val, err_top1_val_aux, err_top5_val_aux = test(ctx, val_data)

            logger.info('[Epoch %d] training: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'%
                (epoch, err_top1, err_top5, err_top1_aux, err_top5_aux))
            logger.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'%
                (epoch, err_top1_val, err_top5_val, err_top1_val_aux, err_top5_val_aux))

            if err_top1_val < best_val_score and epoch > 50:
                best_val_score = err_top1_val
                net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))


    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    train(context)
Exemple #13
0
First let's import some necessary libraries:
"""

from matplotlib import pyplot as plt
import gluoncv
from gluoncv import model_zoo, data, utils

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an Faster RCNN model trained on COCO
# dataset with ResNet-50 backbone.

net = model_zoo.get_model('faster_rcnn_resnet50_v1b_coco', pretrained=True)

######################################################################
# Pre-process an image
# --------------------
# Similar to faster rcnn inference tutorial, we grab and preprocess a demo image

im_fname = utils.download('https://github.com/dmlc/web-data/blob/master/' +
                          'gluoncv/detection/biking.jpg?raw=true',
                          path='biking.jpg')
x, orig_img = data.transforms.presets.rcnn.load_test(im_fname)

######################################################################
# Reset classes to exactly what we want
# -------------------------------------
# Original COCO model has 80 classes
Exemple #14
0
"""

from gluoncv import model_zoo, data, utils
from matplotlib import pyplot as plt

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an YOLOv3 model trained with on Pascal VOC
# dataset with Darknet53 as the base model. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.

net = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True)

######################################################################
# Pre-process an image
# --------------------
#
# Next we download an image, and pre-process with preset data transforms. Here we
# specify that we resize the short edge of the image to 512 px. You can
# feed an arbitrarily sized image.
# Once constraint for YOLO is that input height and width can be divided by 32.
#
# You can provide a list of image file names, such as ``[im_fname1, im_fname2,
# ...]`` to :py:func:`gluoncv.data.transforms.presets.yolo.load_test` if you
# want to load multiple image together.
#
# This function returns two results. The first is a NDArray with shape
Exemple #15
0
from gluoncv.utils.viz import plot_keypoints


parser = argparse.ArgumentParser(description='Predict ImageNet classes from a given image')
parser.add_argument('--detector', type=str, default='yolo3_mobilenet1.0_coco',
                    help='name of the detection model to use')
parser.add_argument('--pose-model', type=str, default='simple_pose_resnet50_v1b',
                    help='name of the pose estimation model to use')
parser.add_argument('--input-pic', type=str, required=True,
                    help='path to the input picture')
opt = parser.parse_args()

def keypoint_detection(img_path, detector, pose_net):
    x, img = data.transforms.presets.yolo.load_test(img_path, short=512)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs)
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

    ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                        box_thresh=0.5, keypoint_thresh=0.2)
    plt.show()

if __name__ == '__main__':
    detector = get_model(opt.detector, pretrained=True)
    detector.reset_class(["person"], reuse_weights=['person'])
    net = get_model(opt.pose_model, pretrained=True)

    keypoint_detection(opt.input_pic, detector, net)

#############################################################################
# Model Definition
# -----------------
#
# A Simple Pose model consists of a main body of a resnet, and several deconvolution layers.
# Its final layer is a convolution layer predicting one heatmap for each keypoint.
#
# Let's take a look at the smallest one from the GluonCV Model Zoo, using ``ResNet18`` as its base model.
#
# We load the pre-trained parameters for the ``ResNet18`` layers,
# and initialize the deconvolution layer and the final convolution layer.

context = mx.gpu(0)
net = get_model('simple_pose_resnet18_v1b', num_joints=17, pretrained_base=True,
                ctx=context, pretrained_ctx=context)
net.deconv_layers.initialize(ctx=context)
net.final_layer.initialize(ctx=context)

#############################################################################
# We can take a look at the summary of the model

x = mx.nd.ones((1, 3, 256, 192), ctx=context)
net.summary(x)

#############################################################################
# 
# .. note::
# 
#     The Batch Normalization implementation from cuDNN has a negative impact on the model training, 
#     as reported in these issues [2]_, [3]_ .
Exemple #17
0
def main():
    opt = parse_args()
    makedirs(opt.log_dir)
    filehandler = logging.FileHandler(opt.log_dir + '/' + opt.logging_file)
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)
    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167
    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size

    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=0,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    sw = SummaryWriter(logdir=opt.log_dir, flush_secs=5, verbose=False)
    optimizer = 'sgd'
    optimizer_params = {
        'wd': opt.wd,
        'momentum': opt.momentum,
        'lr_scheduler': lr_scheduler
    }
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True
    #net = ghostnet(num_classes=classes, width=opt.width, dropout=opt.dropout)
    net = ghostnet()

    net.cast(opt.dtype)
    #net.hybridize()

    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)

    # teacher model for distillation training
    if opt.teacher is not None and opt.hard_weight < 1.0:
        teacher_name = opt.teacher
        teacher = get_model(teacher_name,
                            pretrained=True,
                            classes=classes,
                            ctx=context)
        teacher.cast(opt.dtype)
        distillation = True
    else:
        distillation = False

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx,
                     batch_size, num_workers, seed):
        rec_train = os.path.expanduser(rec_train)
        rec_train_idx = os.path.expanduser(rec_train_idx)
        rec_val = os.path.expanduser(rec_val)
        rec_val_idx = os.path.expanduser(rec_val_idx)
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))
        mean_rgb = [123.68, 116.779, 103.939]
        std_rgb = [58.393, 57.12, 57.375]

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch.data[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        train_data = mx.io.ImageRecordIter(
            path_imgrec=rec_train,
            path_imgidx=rec_train_idx,
            preprocess_threads=num_workers,
            shuffle=True,
            batch_size=batch_size,
            data_shape=(3, input_size, input_size),
            mean_r=mean_rgb[0],
            mean_g=mean_rgb[1],
            mean_b=mean_rgb[2],
            std_r=std_rgb[0],
            std_g=std_rgb[1],
            std_b=std_rgb[2],
            rand_mirror=True,
            random_resized_crop=True,
            max_aspect_ratio=4. / 3.,
            min_aspect_ratio=3. / 4.,
            max_random_area=1,
            min_random_area=0.08,
            brightness=jitter_param,
            saturation=jitter_param,
            contrast=jitter_param,
            pca_noise=lighting_param,
            seed=seed,
            seed_aug=seed,
            shuffle_chunk_seed=seed,
        )
        val_data = mx.io.ImageRecordIter(
            path_imgrec=rec_val,
            path_imgidx=rec_val_idx,
            preprocess_threads=num_workers,
            shuffle=False,
            batch_size=batch_size,
            resize=resize,
            data_shape=(3, input_size, input_size),
            mean_r=mean_rgb[0],
            mean_g=mean_rgb[1],
            mean_b=mean_rgb[2],
            std_r=std_rgb[0],
            std_g=std_rgb[1],
            std_b=std_rgb[2],
        )
        return train_data, val_data, batch_fn

    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(), normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(resize, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(), normalize
        ])

        train_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=True).transform_first(transform_train),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           last_batch='discard',
                                           num_workers=num_workers)
        val_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=False).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        return train_data, val_data, batch_fn

    if opt.use_rec:
        if opt.use_dali:
            train_data = dali.get_data_rec((3, opt.input_size, opt.input_size),
                                           opt.crop_ratio,
                                           opt.rec_train,
                                           opt.rec_train_idx,
                                           opt.batch_size,
                                           num_workers=2,
                                           train=True,
                                           shuffle=True,
                                           backend='dali-gpu',
                                           gpu_ids=[0, 1],
                                           kv_store='nccl',
                                           dtype=opt.dtype,
                                           input_layout='NCHW')
            val_data = dali.get_data_rec((3, opt.input_size, opt.input_size),
                                         opt.crop_ratio,
                                         opt.rec_val,
                                         opt.rec_val_idx,
                                         opt.batch_size,
                                         num_workers=2,
                                         train=False,
                                         shuffle=False,
                                         backend='dali-gpu',
                                         gpu_ids=[0, 1],
                                         kv_store='nccl',
                                         dtype=opt.dtype,
                                         input_layout='NCHW')

            def batch_fn(batch, ctx):
                data = gluon.utils.split_and_load(batch[0],
                                                  ctx_list=ctx,
                                                  batch_axis=0)
                label = gluon.utils.split_and_load(batch[1],
                                                   ctx_list=ctx,
                                                   batch_axis=0)
                return data, label
        else:
            train_data, val_data, batch_fn = get_data_rec(
                opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx,
                batch_size, num_workers, opt.random_seed)
    else:
        train_data, val_data, batch_fn = get_data_loader(
            opt.data_dir, batch_size, num_workers)

    if opt.mixup:
        train_metric = mx.metric.RMSE()
    else:
        train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def mixup_transform(label, classes, lam=1, eta=0.0):
        if isinstance(label, nd.NDArray):
            label = [label]
        res = []
        for l in label:
            y1 = l.one_hot(classes,
                           on_value=1 - eta + eta / classes,
                           off_value=eta / classes)
            y2 = l[::-1].one_hot(classes,
                                 on_value=1 - eta + eta / classes,
                                 off_value=eta / classes)
            res.append(lam * y1 + (1 - lam) * y2)
        return res

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            res = l.one_hot(classes,
                            on_value=1 - eta + eta / classes,
                            off_value=eta / classes)
            smoothed.append(res)
        return smoothed

    def test(net, batch_fn, ctx, val_data):
        if opt.use_rec:
            val_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)
        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params is '':
            net.initialize(mx.init.MSRAPrelu(), ctx=ctx, force_reinit=True)
        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        trainer = gluon.Trainer(net.collect_params(), optimizer,
                                optimizer_params)
        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True

        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(
                temperature=opt.temperature,
                hard_weight=opt.hard_weight,
                sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(
                sparse_label=sparse_label_loss)

        best_val_score = 0
        iteration = 0

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):

                data, label = batch_fn(batch, ctx)
                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                                    for X in data]

                with ag.record():
                    outputs = [
                        net(X.astype(opt.dtype, copy=False)) for X in data
                    ]
                    if distillation:
                        loss = [
                            L(yhat.astype('float32', copy=False),
                              y.astype('float32', copy=False),
                              p.astype('float32', copy=False))
                            for yhat, y, p in zip(outputs, label, teacher_prob)
                        ]
                    else:
                        loss = [
                            L(yhat, y.astype(opt.dtype, copy=False))
                            for yhat, y in zip(outputs, label)
                        ]
                for l in loss:
                    l.backward()
                sw.add_scalar(tag='train_loss',
                              value=sum([l.sum().asscalar()
                                         for l in loss]) / len(loss),
                              global_step=iteration)

                trainer.step(batch_size)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                    for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)
                train_metric_name, train_metric_score = train_metric.get()
                sw.add_scalar(
                    tag='train_{}_curves'.format(train_metric_name),
                    value=('train_{}_value'.format(train_metric_name),
                           train_metric_score),
                    global_step=iteration)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'
                        % (epoch, i, batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score, trainer.learning_rate))
                    btic = time.time()
                iteration += 1
            if epoch == 0:
                sw.add_graph(net)

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))

            top1_val_acc, top5_val_acc = test(net, batch_fn, ctx, val_data)
            sw.add_scalar(tag='val_acc_curves',
                          value=('valid_acc_value', top1_val_acc),
                          global_step=epoch)
            logger.info('Epoch [%d] training: %s=%f' %
                        (epoch, train_metric_name, train_metric_score))
            logger.info('Epoch [%d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info('Epoch [%d] validation: top1_acc=%f top5_acc=%f' %
                        (epoch, top1_val_acc, top5_val_acc))

            if top1_val_acc > best_val_score:
                best_val_score = top1_val_acc
                net.collect_params().save(
                    '%s/%.4f-ghostnet_imagenet-%d-best.params' %
                    (save_dir, best_val_score, epoch))
                trainer.save_states(
                    '%s/%.4f-ghostnet_imagenet-%d-best.states' %
                    (save_dir, best_val_score, epoch))

            if save_frequency and save_dir and (epoch +
                                                1) % save_frequency == 0:
                net.collect_params().save('%s/ghostnet_imagenet-%d.params' %
                                          (save_dir, epoch))
                trainer.save_states('%s/ghostnet_imagenet-%d.states' %
                                    (save_dir, epoch))

        sw.close()
        if save_frequency and save_dir:
            net.collect_params().save('%s/ghostnet_imagenet-%d.params' %
                                      (save_dir, opt.num_epochs - 1))
            trainer.save_states('%s/ghostnet_imagenet-%d.states' %
                                (save_dir, opt.num_epochs - 1))

    net.hybridize(static_alloc=True, static_shape=True)
    if distillation:
        teacher.hybridize(static_alloc=True, static_shape=True)
    train(context)
            # split ground truths
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))

        # update metric        
        eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids)
    return eval_metric.get()

if __name__ == '__main__':
    args = parse_args()       

    # training contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]

    # network
    net_name = args.net_params

    if net_name is not "":
        net = get_model("yolo3_darknet53_custom", classes = read_classes(args), pretrained_base=True)     
        net.load_parameters(net_name)
        val_dataset, val_metric = get_dataset(args, read_classes(args))
    else:
        net = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)        
        val_dataset, val_metric = get_dataset(args,net.classes)
	
    net.collect_params().reset_ctx(ctx)
    val_loader = get_dataloader(net, val_dataset, args.data_shape, args.batch_size, args.num_workers, args)
    map_name, mean_ap = validate(net, val_loader, ctx, val_metric)
    val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
    print('Validation: \n{}'.format(val_msg))	   
Exemple #19
0
# network, Region Proposal Network(including its own anchor system, proposal generator),
# region-aware pooling layers, class predictors and bounding box offset predictors.
#
# We highly recommend you to read the original paper to learn more about the ideas
# behind Faster-RCNN [Ren15]_.
#
# `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in Faster-RCNN networks, more on the way.
# You can load your favorate one with one simple line of code:
#
# .. hint::
#
#    To avoid downloading mdoel in this tutorial, we set `pretrained_base=False`,
#    in practice we usually want to load pre-trained imagenet models by setting
#    `pretrained_base=True`.
from gluoncv import model_zoo
net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc',
                          pretrained_base=False)
print(net)

##############################################################################
# Faster-RCNN network is callable with image tensor
import mxnet as mx
x = mx.nd.zeros(shape=(1, 3, 600, 800))
net.initialize()
cids, scores, bboxes = net(x)

##############################################################################
# Faster-RCNN returns three values, where ``cids`` are the class labels,
# ``scores`` are confidence scores of each prediction,
# and ``bboxes`` are absolute coordinates of corresponding bounding boxes.

##############################################################################
Exemple #20
0
        val_dataset.transform(transform_val),
        batch_size=batch_size, shuffle=False, last_batch='keep',
        num_workers=num_workers)

    return val_dataset, val_data, val_batch_fn

input_size = [int(i) for i in opt.input_size.split(',')]
val_dataset, val_data, val_batch_fn = get_data_loader(opt.data_dir, batch_size,
                                                      num_workers, input_size)
val_metric = COCOKeyPointsMetric(val_dataset, 'coco_keypoints',
                                 data_shape=tuple(input_size),
                                 in_vis_thresh=opt.score_threshold)

use_pretrained = True if not opt.params_file else False
model_name = opt.model
net = get_model(model_name, ctx=context, num_joints=num_joints, pretrained=use_pretrained)
if not use_pretrained:
    net.load_parameters(opt.params_file, ctx=context)
net.hybridize()

def validate(val_data, val_dataset, net, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric.reset()

    from tqdm import tqdm
    for batch in tqdm(val_data):
        data, scale, center, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
Exemple #21
0
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get classes list, if we are using a pretrained network from the model_zoo
    classes = None
    if opt.use_pretrained:
        if "kinetics400" in model_name:
            classes = Kinetics400Attr().classes
        elif "ucf101" in model_name:
            classes = UCF101Attr().classes
        elif "hmdb51" in model_name:
            classes = HMDB51Attr().classes
        elif "sthsth" in model_name:
            classes = SomethingSomethingV2Attr().classes

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        # Try to report a text label instead of the number.
        if classes:
            pred_label = classes[pred_label]

        logger.info('%04d/%04d: %s is predicted to class %s' %
                    (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' %
                ((end_time - start_time) / 60))
from gluoncv.model_zoo import get_model
from gluoncv.utils import makedirs, TrainingHistory

################################################################
#
# There are numerous structures for convolutional neural networks.
# Here we pick a simple yet well-performing structure, ``cifar_resnet20_v1``, for the
# tutorial.

# number of GPUs to use
num_gpus = 1
ctx = [mx.gpu(i) for i in range(num_gpus)]

# Get the model CIFAR_ResNet20_v1, with 10 output classes, without pre-trained weights
net = get_model('cifar_resnet20_v1', classes=10)
net.initialize(mx.init.Xavier(), ctx = ctx)

################################################################
# Data Augmentation and Data Loader
# ---------------------------------
#
# Data augmentation is a common technique used for training. It is
# base on the assumption that, for the same object, photos under different
# composition, lighting condition, or color should all yield the same prediction.
#
# Here are photos of the Golden Bridge, taken by many people,
# at different time from different angles.
# We can easily tell that they are photos of the same thing.
#
# |image-golden-bridge|
##########################################################
# Mask RCNN Network
# -------------------
# In GluonCV, Mask RCNN network :py:class:`gluoncv.model_zoo.MaskRCNN`
# is inherited from Faster RCNN network :py:class:`gluoncv.model_zoo.FasterRCNN`.
#
# `Gluon Model Zoo <../../model_zoo/index.html>`__ has some Mask RCNN pretrained networks.
# You can load your favorite one with one simple line of code:
#
# .. hint::
#
#    To avoid downloading models in this tutorial, we set ``pretrained_base=False``,
#    in practice we usually want to load pre-trained imagenet models by setting
#    ``pretrained_base=True``.
from gluoncv import model_zoo
net = model_zoo.get_model('mask_rcnn_resnet50_v1b_coco', pretrained_base=False)
print(net)

##############################################################################
# Mask-RCNN has identical inputs but produces an additional output.
# ``cids`` are the class labels,
# ``scores`` are confidence scores of each prediction,
# ``bboxes`` are absolute coordinates of corresponding bounding boxes.
# ``masks`` are predicted segmentation masks corresponding to each bounding box
import mxnet as mx
x = mx.nd.zeros(shape=(1, 3, 600, 800))
net.initialize()
cids, scores, bboxes, masks = net(x)

##############################################################################
# During training, an additional output is returned:
Exemple #24
0
    logger.info('[Epoch {}] Validation: \n{}'.format(args.start_epoch,
                                                     val_msg))


if __name__ == '__main__':
    args = parse_args()

    # evaluating contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]

    # network
    net_name = '_'.join(('yolo3', 'tiny_darknet', args.dataset))
    args.save_prefix += net_name

    net = get_model(net_name)
    if not args.resume.strip():
        if args.start_epoch == -1:
            raise ValueError(
                "You have to either give the path of the saved model or specify the start epoch!"
            )
        # Predict the path of the saved weights from the `start_epoch` parameter
        args.resume = '{:s}_{:04d}.params'.format(args.save_prefix,
                                                  args.start_epoch)
    print(f'Loading weights from {args.resume}')
    net.load_parameters(args.resume.strip())

    # val data
    val_dataset, eval_metric = get_dataset(args.dataset, args)
    val_data = get_dataloader(val_dataset, args.data_shape, args.batch_size,
                              args.num_workers, args)
from gluoncv import model_zoo

model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)
    batch_size=batch_size, shuffle=False, num_workers = num_workers)

################################################################################
#
# Note that only ``train_data`` uses ``transform_train``, while
# ``val_data`` and ``test_data`` use ``transform_test`` to produce deterministic
# results for evaluation.
#
# Model and Trainer
# -----------------
#
# We use a pre-trained ``ResNet50_v2`` model, which has balanced accuracy and
# computation cost.

model_name = 'ResNet50_v2'
finetune_net = get_model(model_name, pretrained=True)
with finetune_net.name_scope():
    finetune_net.output = nn.Dense(classes)
finetune_net.output.initialize(init.Xavier(), ctx = ctx)
finetune_net.collect_params().reset_ctx(ctx)
finetune_net.hybridize()

trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
                        'learning_rate': lr, 'momentum': momentum, 'wd': wd})
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()

################################################################################
# Here's an illustration of the pre-trained model
# and our newly defined model:
#
Exemple #27
0
        save_params(net, best_map, current_map, epoch, args.save_interval, args.save_prefix)

if __name__ == '__main__':
    args = parse_args()
    # fix seed for mxnet, numpy and python builtin random generator.
    gutils.random.seed(args.seed)

    # training contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]

    # network
    net_name = '_'.join(('ssd', str(args.data_shape), args.network, args.dataset))
    args.save_prefix += net_name
    if args.syncbn and len(ctx) > 1:
        net = get_model(net_name, pretrained_base=True, norm_layer=gluon.contrib.nn.SyncBatchNorm,
                        norm_kwargs={'num_devices': len(ctx)})
        async_net = get_model(net_name, pretrained_base=False)  # used by cpu worker
    else:
        net = get_model(net_name, pretrained_base=True, norm_layer=gluon.nn.BatchNorm)
        async_net = net
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
        async_net.load_parameters(args.resume.strip())
    else:
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            net.initialize()
            async_net.initialize()

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
Exemple #28
0

'''Loading the model and webcam
----------------------------

In this tutorial we feed frames from the webcam into a detector, 
then we estimate the pose for each detected people in the frame.

For the detector we use ``ssd_512_mobilenet1.0_coco`` as it is fast and accurate enough.


.. code-block:: python'''

ctx = mx.cpu()
detector_name = "ssd_512_mobilenet1.0_coco"
detector = get_model(detector_name, pretrained=True, ctx=ctx)


'''The pre-trained model tries to detect all 80 classes of objects in an image,
however in pose estimation we are only interested in one object class: person.

To speed up the detector, we can reset the prediction head to only include the classes we need.

.. code-block:: python'''

detector.reset_class(classes=['person'], reuse_weights={'person':'person'})
detector.hybridize()

'''Next for the estimator, we choose ``simple_pose_resnet18_v1b`` for it is light-weighted.

The default ``simple_pose_resnet18_v1b`` model was trained with input size 256x192.
Exemple #29
0
            outputs = tracker.track(frame, ctx=mx.cpu())
            pred_bbox = outputs['bbox']
            pred_bboxes.append(pred_bbox)
            scores.append(outputs['best_score'])
        pred_bbox = list(map(int, pred_bbox))
        cv2.rectangle(
            frame, (pred_bbox[0], pred_bbox[1]),
            (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
            (0, 255, 255), 3)
        cv2.imwrite(os.path.join(opt.save_dir, '%04d.jpg' % (ind + 1)), frame)


if __name__ == '__main__':
    opt = parse_args()
    # ######################################################################
    # Load a pretrained model
    # -------------------------
    #
    # Let's get an SiamRPN model trained. We pick the one using Alexnet as the base model.
    # By specifying ``pretrained=True``, it will automatically download the model from the model
    # zoo if necessary. For more pretrained models, please refer to
    # :doc:`../../model_zoo/index`.
    net = model_zoo.get_model(opt.netwrok, ctx=mx.cpu(), pretrained=True)
    tracker = build_tracker(net)
    # Pre-process data
    video_frames = read_data(opt)
    ######################################################################
    plt.imshow(video_frames[0])
    plt.show()
    # Predict with a SiamRPN and make inference
    inference(video_frames, tracker, opt)
from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get a Simple Pose model trained with input images of size 256x192 on MS COCO
# dataset. We pick the one using ResNet-18 V1b as the base model. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.
#
# Note that a Simple Pose model takes a top-down strategy to estimate
# human pose in detected bounding boxes from an object detection model.

detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True)
pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True)

# Note that we can reset the classes of the detector to only include
# human, so that the NMS process is faster.

detector.reset_class(["person"], reuse_weights=['person'])

######################################################################
# Pre-process an image for detector, and make inference
# --------------------
#
# Next we download an image, and pre-process with preset data transforms. Here we
# specify that we resize the short edge of the image to 512 px. But you can
# feed an arbitrarily sized image.
#
Exemple #31
0
    image_shape = (3, 224, 224)
    data_shape = (batch_size, ) + image_shape
    train_data = SyntheticDataIter(num_classes, data_shape, epoch_size,
                                   np.float32, context)
    val_data = None

# Get model from GluonCV model zoo
# https://gluon-cv.mxnet.io/model_zoo/index.html
kwargs = {
    'ctx': context,
    'pretrained': args.use_pretrained,
    'classes': num_classes
}
if args.last_gamma:
    kwargs['last_gamma'] = True
net = get_model(args.model, **kwargs)
net.cast(args.dtype)

# Create initializer
initializer = mx.init.Xavier(rnd_type='gaussian',
                             factor_type="in",
                             magnitude=2)


def train_gluon():
    def evaluate(epoch):
        if not args.use_rec:
            return

        val_data.reset()
        acc_top1 = mx.metric.Accuracy()
Exemple #32
0
def main(logger):
    opt = parse_args()
    logger.info(opt)
    gc.set_threshold(100, 5, 5)

    if not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    # set env
    gpu_id = opt.gpu_id
    context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    feat_ext=True,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test)
        video_input = video_data.as_in_context(context)
        video_feat = net(video_input.astype(opt.dtype, copy=False))

        feat_file = '%s_%s_feat.npy' % (model_name, video_name)
        np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy())

        if vid > 0 and vid % opt.log_interval == 0:
            logger.info('%04d/%04d is done' % (vid, len(data_list)))

    end_time = time.time()
    logger.info('Total feature extraction time is %4.2f minutes' %
                ((end_time - start_time) / 60))
Exemple #33
0
def test(args):
    # output folder
    outdir = 'outdir'
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    # image transform
    input_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    ])
    # dataset and dataloader
    if args.eval:
        testset = get_segmentation_dataset(args.dataset,
                                           split='val',
                                           mode='testval',
                                           transform=input_transform)
        total_inter, total_union, total_correct, total_label = \
            np.int64(0), np.int64(0), np.int64(0), np.int64(0)
    else:
        testset = get_segmentation_dataset(args.dataset,
                                           split='test',
                                           mode='test',
                                           transform=input_transform)
    test_data = gluon.data.DataLoader(testset,
                                      args.test_batch_size,
                                      last_batch='keep',
                                      batchify_fn=ms_batchify_fn,
                                      num_workers=args.workers)
    # create network
    if args.model_zoo is not None:
        model = get_model(args.model_zoo, pretrained=True)
    else:
        model = get_segmentation_model(model=args.model,
                                       dataset=args.dataset,
                                       ctx=args.ctx,
                                       backbone=args.backbone,
                                       norm_layer=args.norm_layer)
        # load pretrained weight
        assert args.resume is not None, '=> Please provide the checkpoint using --resume'
        if os.path.isfile(args.resume):
            model.load_params(args.resume, ctx=args.ctx)
        else:
            raise RuntimeError("=> no checkpoint found at '{}'" \
                .format(args.resume))
    print(model)
    evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx)

    tbar = tqdm(test_data)
    for i, (data, dsts) in enumerate(tbar):
        if args.eval:
            targets = dsts
            predicts = evaluator.parallel_forward(data)
            for predict, target in zip(predicts, targets):
                target = target.as_in_context(predict[0].context)
                correct, labeled = batch_pix_accuracy(predict[0], target)
                inter, union = batch_intersection_union(
                    predict[0], target, testset.num_class)
                total_correct += correct.astype('int64')
                total_label += labeled.astype('int64')
                total_inter += inter.astype('int64')
                total_union += union.astype('int64')
            pixAcc = np.float64(1.0) * total_correct / (
                np.spacing(1, dtype=np.float64) + total_label)
            IoU = np.float64(1.0) * total_inter / (
                np.spacing(1, dtype=np.float64) + total_union)
            mIoU = IoU.mean()
            tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
        else:
            im_paths = dsts
            predicts = evaluator.parallel_forward(data)
            for predict, impath in zip(predicts, im_paths):
                predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 1)).asnumpy()
                mask = get_color_pallete(predict, args.dataset)
                outname = os.path.splitext(impath)[0] + '.png'
                mask.save(os.path.join(outdir, outname))
Exemple #34
0
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]
    args.batch_size = len(ctx)  # 1 batch per device

    # network
    kwargs = {}
    module_list = []
    if args.use_fpn:
        module_list.append('fpn')
    if args.norm_layer is not None:
        module_list.append(args.norm_layer)
        if args.norm_layer == 'bn':
            kwargs['num_devices'] = len(args.gpus.split(','))
    net_name = '_'.join(('mask_rcnn', *module_list, args.network, args.dataset))
    args.save_prefix += net_name
    net = get_model(net_name, pretrained_base=True, **kwargs)
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
    else:
        for param in net.collect_params().values():
            if param._data is not None:
                continue
            param.initialize()
    net.collect_params().reset_ctx(ctx)

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
    train_data, val_data = get_dataloader(
        net, train_dataset, val_dataset, MaskRCNNDefaultTrainTransform, MaskRCNNDefaultValTransform,
        args.batch_size, args.num_workers, args.use_fpn)
Exemple #35
0
# (which means it can be exported
# to symbol to run in C++, Scala and other language bindings.
# We will cover this usage in future tutorials).
# In terms of structure, SSD networks are composed of base feature extraction
# network, anchor generators, class predictors and bounding box offset predictors.
#
# For more details on how SSD detector works, please refer to our introductory
# [tutorial](http://gluon.mxnet.io/chapter08_computer-vision/object-detection.html)
# You can also refer to the original paper to learn more about the intuitions
# behind SSD.
#
# `Gluon Model Zoo <../../model_zoo/index.html>`__ has a lot of built-in SSD networks.
# You can load your favorate one with one simple line of code:
from gluoncv import model_zoo

net = model_zoo.get_model('ssd_300_vgg16_atrous_voc', pretrained_base=False)
print(net)

##############################################################################
# SSD network is a HybridBlock as mentioned before. You can call it with an input as:
import mxnet as mx

x = mx.nd.zeros(shape=(1, 3, 512, 512))
net.initialize()
cids, scores, bboxes = net(x)

##############################################################################
# SSD returns three values, where ``cids`` are the class labels,
# ``scores`` are confidence scores of each prediction,
# and ``bboxes`` are absolute coordinates of corresponding bounding boxes.
from matplotlib import pyplot as plt
import gluoncv
from gluoncv import model_zoo, data, utils

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an Faster RCNN model trained on Pascal VOC
# dataset with ResNet-50 backbone. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.

net = model_zoo.get_model('faster_rcnn_resnet50_v2a_voc', pretrained=True)

######################################################################
# Pre-process an image
# --------------------
#
# Next we download an image, and pre-process with preset data transforms. Here we
# specify that we resize the short edge of the image to 512 px. But you can
# feed an arbitrarily sized image.
#
# You can provide a list of image file names, such as ``[im_fname1, im_fname2,
# ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you
# want to load multiple image together.
#
# This function returns two results. The first is a NDArray with shape
# `(batch_size, RGB_channels, height, width)`. It can be fed into the
Exemple #37
0
First let's import some necessary libraries:
"""

from gluoncv import model_zoo, data, utils
from matplotlib import pyplot as plt

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an SSD model trained with 512x512 images on Pascal VOC
# dataset with ResNet-50 V1 as the base model. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.
net = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True)
######################################################################
# Pre-process an image
# --------------------
#
# Next we download an image, and pre-process with preset data transforms. Here we
# specify that we resize the short edge of the image to 512 px. But you can
# feed an arbitrarily sized image.
#
# You can provide a list of image file names, such as ``[im_fname1, im_fname2,
# ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you
# want to load multiple image together.
#
# This function returns two results. The first is a NDArray with shape
# `(batch_size, RGB_channels, height, width)`. It can be fed into the
# model directly. The second one contains the images in numpy format to
Exemple #38
0
def main():
    opt = parse_args()

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=True,
                    tsn=opt.use_tsn)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)
    print('Pre-trained model %s is successfully loaded' % (opt.resume_params))

    # get data
    normalize = video.VideoNormalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    transform_test = transforms.Compose(
        [video.VideoTenCrop(opt.input_size),
         video.VideoToTensor(), normalize])

    val_dataset = ucf101.classification.UCF101(setting=opt.val_list,
                                               root=opt.data_dir,
                                               train=False,
                                               new_width=opt.new_width,
                                               new_height=opt.new_height,
                                               target_width=opt.input_size,
                                               target_height=opt.input_size,
                                               test_mode=True,
                                               num_segments=opt.num_segments,
                                               transform=transform_test)
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers)
    print('Load %d test samples.' % len(val_dataset))

    # start evaluation
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation.
    This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data bacthes.
    `num_split_frames` has to be multiples of 3.
    """
    num_data_batches = 10
    num_split_frames = int(750 / num_data_batches)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            outputs = []
            for seg_id in range(num_data_batches):
                bs = seg_id * num_split_frames
                be = (seg_id + 1) * num_split_frames
                new_batch = [batch[0][:, bs:be, :, :], batch[1]]
                data, label = batch_fn(new_batch, ctx)
                for gpu_id, X in enumerate(data):
                    X_reshaped = X.reshape(
                        (-1, 3, opt.input_size, opt.input_size))
                    pred = net(X_reshaped.astype(opt.dtype, copy=False))
                    if seg_id == 0:
                        outputs.append(pred)
                    else:
                        outputs[gpu_id] = nd.concat(outputs[gpu_id],
                                                    pred,
                                                    dim=0)
            # Perform the mean operation on 250 samples of each video
            for gpu_id, out in enumerate(outputs):
                outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0)

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

            if i > 0 and i % opt.log_interval == 0:
                print('%04d/%04d is done' % (i, len(val_data)))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Exemple #39
0
        val_dataset.transform(transform_val),
        batch_size=batch_size, shuffle=False, last_batch='keep',
        num_workers=num_workers)

    return val_dataset, val_data, val_batch_fn

input_size = [int(i) for i in opt.input_size.split(',')]
val_dataset, val_data, val_batch_fn = get_data_loader(opt.data_dir, batch_size,
                                                      num_workers, input_size)
val_metric = COCOKeyPointsMetric(val_dataset, 'coco_keypoints',
                                 data_shape=tuple(input_size),
                                 in_vis_thresh=opt.score_threshold)

use_pretrained = True if not opt.params_file else False
model_name = opt.model
net = get_model(model_name, ctx=context, num_joints=num_joints, pretrained=use_pretrained)
if not use_pretrained:
    net.load_parameters(opt.params_file, ctx=context)
net.hybridize()

def validate(val_data, val_dataset, net, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    val_metric.reset()

    from tqdm import tqdm
    for batch in tqdm(val_data):
        data, scale, center, score, imgid = val_batch_fn(batch, ctx)

        outputs = [net(X) for X in data]
Exemple #40
0
import gluoncv
from matplotlib import pyplot as plt
from gluoncv import model_zoo, data, utils
from PIL import Image
import numpy as np
import mxnet as mx
import os
import pickle
import time


net = model_zoo.get_model('faster_rcnn_fpn_resnet50_v1b_coco', pretrained=True)

class_list = net.classes

dir_list = ["love"]

for i in range(9):
    start = time.time()
    step = 0
    os.chdir('/hdd/user16')
    file_name = dir_list[i] + '.txt'
    f = open(file_name, 'w')
    dir_path = '/hdd/user16/' + dir_list[i]
    os.chdir(dir_path)
    file_names = os.listdir()
    print(file_names)
    for file_name_ in file_names:
        f.write(file_name_+' - ')
        step += 1
        img_path = '/hdd/user16/' + dir_list[i] + '/' + str(file_name_)

if __name__ == '__main__':
    args = parse_args()
    # fix seed for mxnet, numpy and python builtin random generator.
    gutils.random.seed(args.seed)

    # training contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]
    args.batch_size = len(ctx)  # 1 batch per device

    # network
    net_name = '_'.join(('fpn', args.network, args.dataset))
    args.save_prefix += net_name
    net = get_model(net_name, pretrained_base=True)
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
    else:
        for param in net.collect_params().values():
            if param._data is not None:
                continue
            param.initialize()
    net.collect_params().reset_ctx(ctx)

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
    train_data, val_data = get_dataloader(net, train_dataset, val_dataset,
                                          args.batch_size, args.num_workers)

    # training
Exemple #42
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        # dataset and dataloader
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size
        }
        trainset = get_segmentation_dataset(args.dataset,
                                            split=args.train_split,
                                            mode='train',
                                            **data_kwargs)
        valset = get_segmentation_dataset(args.dataset,
                                          split='val',
                                          mode='val',
                                          **data_kwargs)
        self.train_data = gluon.data.DataLoader(trainset,
                                                args.batch_size,
                                                shuffle=True,
                                                last_batch='rollover',
                                                num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset,
                                               args.test_batch_size,
                                               last_batch='rollover',
                                               num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model,
                                           dataset=args.dataset,
                                           backbone=args.backbone,
                                           norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs,
                                           aux=args.aux,
                                           crop_size=args.crop_size)
        model.cast(args.dtype)
        print(model)
        self.net = DataParallelModel(model, args.ctx, args.syncbn)
        self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'" \
                    .format(args.resume))
        # create criterion
        criterion = MixSoftmaxCrossEntropyLoss(args.aux,
                                               aux_weight=args.aux_weight)
        self.criterion = DataParallelCriterion(criterion, args.ctx,
                                               args.syncbn)
        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly',
                                        baselr=args.lr,
                                        niters=len(self.train_data),
                                        nepochs=args.epochs)
        kv = mx.kv.create(args.kvstore)
        optimizer_params = {
            'lr_scheduler': self.lr_scheduler,
            'wd': args.weight_decay,
            'momentum': args.momentum
        }
        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.module.collect_params(
                    '.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.module.collect_params(),
                                       'sgd',
                                       optimizer_params,
                                       kvstore=kv)
        # evaluation metrics
        self.metric = gluoncv.utils.metrics.SegmentationMetric(
            trainset.num_class)
Exemple #43
0
def test(args):
    if not horse_change:
        # output folder
        # outdir = 'outdir'
        outdir = args.outdir
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            # transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
            # transforms.Normalize([0, 0, 0], [1, 1, 1]),
            # transforms.Normalize([0], [100]), # this is for 1 channel: ([0], [1]) ([556.703], [482.175])
        ])
        # dataset and dataloader
        if args.eval:
            testset = get_segmentation_dataset(args.dataset, 
                                               split='val', 
                                               mode='testval', 
                                               transform=input_transform)
            total_inter, total_union, total_correct, total_label = \
                np.int64(0), np.int64(0), np.int64(0), np.int64(0)
        else:
            testset = get_segmentation_dataset(args.dataset, 
                                               split='test', 
                                               mode='test', 
                                               transform=input_transform)
        test_data = gluon.data.DataLoader(testset, 
                                          args.test_batch_size, 
                                          shuffle=False, 
                                          last_batch='keep',
                                          batchify_fn=ms_batchify_fn, 
                                          num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model, 
                                           dataset=args.dataset, 
                                           ctx=args.ctx,
                                           backbone=args.backbone, 
                                           norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs, 
                                           aux=args.aux,
                                           base_size=args.base_size, 
                                           crop_size=args.crop_size)
            # load pretrained weight
            assert args.resume is not None, '=> Please provide the checkpoint using --resume'
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'" \
                    .format(args.resume))
        # print(model) # [horse]: do not print model
        evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx)
        metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class)

        print('testset.pred_offset:', testset.pred_offset) # horse
        print('model.crop_size', model.crop_size) # horse

        tbar = tqdm(test_data)
        for i, (data, dsts) in enumerate(tbar):
            if args.eval:
                # print('data', data[0].shape) # horse
                predicts = [pred[0] for pred in evaluator.parallel_forward(data)]
                # print('predicts', predicts[0].shape)
                targets = [target.as_in_context(predicts[0].context) \
                           for target in dsts]
                # horse begin 
                '''
                predict = mx.nd.squeeze(mx.nd.argmax(predicts[0], 0)).asnumpy() + \
                        testset.pred_offset
                '''
                # horse end
                print('targets', targets[0].shape)
                metric.update(targets, predicts)
                pixAcc, mIoU = metric.get()
                tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
            else:
                output_score_map = True # [horse added]
                if output_score_map:
                    # score_map_dir = 'scoredir'
                    score_map_dir = args.scoredir
                    if not os.path.exists(score_map_dir):
                        os.makedirs(score_map_dir)

                im_paths = dsts
                # print('data', data[0].shape) # horse
                predicts = evaluator.parallel_forward(data)
                # print(predicts[0].shape)
                for predict, impath in zip(predicts, im_paths):
                    # change from 1 to 0 [horse]
                    # print('predict:', predict[0].shape) # predict: (3, 127, 207)
                    if output_score_map:
                        score_map_name = os.path.splitext(impath)[0] + '.pkl'
                        score_map_path = os.path.join(score_map_dir, score_map_name)
                        with open(score_map_path, 'wb') as fo:
                            pickle.dump(predict[0].asnumpy()[0:3,:,:], fo)
                    '''
                    if i == 50:
                        with open('have_a_look.pkl', 'wb') as fo:
                            pickle.dump(predict[0].asnumpy(),fo)
                    '''
                    predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 0)).asnumpy() + \
                        testset.pred_offset
                    mask = get_color_pallete(predict, args.dataset)
                    outname = os.path.splitext(impath)[0] + '.png'
                    # print('predict:', predict.shape) # predict: (127, 207)
                    # print('mask:', mask) # it is a PIL.Image.Image
                    mask.save(os.path.join(outdir, outname))
                # break

    if horse_change: 
        # >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>
        # output folder
        outdir = 'outdir'
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            # transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
            # transforms.Normalize([0, 0, 0], [1, 1, 1]),
            # transforms.Normalize([0], [100]), # this is for 1 channel: ([0], [1]) ([556.703], [482.175])
        ])
        # dataset and dataloader
        if args.eval:
            testset = get_segmentation_dataset(args.dataset, 
                                               split='val', 
                                               mode='testval', 
                                               transform=input_transform)
            total_inter, total_union, total_correct, total_label = \
                np.int64(0), np.int64(0), np.int64(0), np.int64(0)
        else:
            testset = get_segmentation_dataset(args.dataset, 
                                               split='test', 
                                               mode='test', 
                                               transform=input_transform)

        test_data = gluon.data.DataLoader(testset, 
                                          args.batch_size, # args.test_batch_size, [horse changed this]
                                          shuffle=False, 
                                          last_batch='keep',
                                          batchify_fn=ms_batchify_fn, 
                                          num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model, 
                                           dataset=args.dataset, 
                                           ctx=args.ctx,
                                           backbone=args.backbone, 
                                           norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs, 
                                           aux=args.aux,
                                           base_size=args.base_size, 
                                           crop_size=args.crop_size)
            # load pretrained weight
            assert args.resume is not None, '=> Please provide the checkpoint using --resume'
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'" \
                    .format(args.resume))
        # print(model) # [horse]: do not print model
        evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx)
        metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class)

        print('testset.pred_offset:', testset.pred_offset) # horse
        print('model.crop_size', model.crop_size) # horse

        tbar = tqdm(test_data)
        for i, (data, dsts) in enumerate(tbar):
            if args.eval:
                # print('data', data[0].shape) # horse
                predicts = [pred[0] for pred in evaluator.parallel_forward(data)]
                # print('predicts', predicts[0].shape)
                targets = [target.as_in_context(predicts[0].context) \
                           for target in dsts]
                # horse begin 
                '''
                predict = mx.nd.squeeze(mx.nd.argmax(predicts[0], 0)).asnumpy() + \
                        testset.pred_offset
                '''
                # horse end
                print('targets', targets[0].shape)
                metric.update(targets, predicts)
                pixAcc, mIoU = metric.get()
                tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
            else:
                output_score_map = True # [horse added]
                if output_score_map:
                    score_map_dir = 'scoredir'

                im_paths = dsts
                print('data', data[0].shape) # horse
                predicts = evaluator.parallel_forward(data)
                print(predicts[0].shape)
                for predict, impath in zip(predicts, im_paths):

                    predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 0)).asnumpy() + \
                        testset.pred_offset
                    mask = get_color_pallete(predict, args.dataset)
                    outname = os.path.splitext(impath)[0] + '.png'

                    mask.save(os.path.join(outdir, outname))
Exemple #44
0
num_gpus = opt.num_gpus
if num_gpus > 0:
    batch_size *= num_gpus
ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
num_workers = opt.num_workers

input_size = opt.input_size
model_name = opt.model
pretrained = True if not opt.params_file else False

kwargs = {'ctx': ctx, 'pretrained': pretrained, 'classes': classes}
if model_name.startswith('resnext'):
    kwargs['use_se'] = opt.use_se

net = get_model(model_name, **kwargs)
net.cast(opt.dtype)
if opt.params_file:
    net.load_params(opt.params_file, ctx=ctx)
net.hybridize()

acc_top1 = mx.metric.Accuracy()
acc_top5 = mx.metric.TopKAccuracy(5)

normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
"""
Aligning with TF implemenation, the default crop-input
ratio set as 0.875; Set the crop as ceil(input-size/ratio)
"""
crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
resize = int(math.ceil(input_size / crop_ratio))
num_gpus = opt.num_gpus
batch_size *= max(1, num_gpus)
context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
num_workers = opt.num_workers

lr_decay = opt.lr_decay
lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf]

model_name = opt.model
if model_name.startswith('cifar_wideresnet'):
    kwargs = {'classes': classes,
              'drop_rate': opt.drop_rate}
else:
    kwargs = {'classes': classes}
net = get_model(model_name, **kwargs)
if opt.resume_from:
    net.load_parameters(opt.resume_from, ctx = context)
optimizer = 'nag'

save_period = opt.save_period
if opt.save_dir and save_period:
    save_dir = opt.save_dir
    makedirs(save_dir)
else:
    save_dir = ''
    save_period = 0

plot_path = opt.save_plot_dir

logging.basicConfig(level=logging.INFO)
model_name = supported_model[0]
dshape = (1, 3, 512, 512)
target_list = ctx_list()

######################################################################
# Download and pre-process demo image

im_fname = download_testdata('https://github.com/dmlc/web-data/blob/master/' +
                             'gluoncv/detection/street_small.jpg?raw=true',
                             'street_small.jpg', module='data')
x, img = data.transforms.presets.ssd.load_test(im_fname, short=512)

######################################################################
# Convert and compile model for CPU.

block = model_zoo.get_model(model_name, pretrained=True)

def build(target):
    mod, params = relay.frontend.from_mxnet(block, {"data": dshape})
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target, params=params)
    return lib

######################################################################
# Create TVM runtime and do inference

def run(lib, ctx):
    # Build TVM runtime
    m = graph_runtime.GraphModule(lib['default'](ctx))
    tvm_input = tvm.nd.array(x.asnumpy(), ctx=ctx)
    m.set_input('data', tvm_input)
Exemple #47
0
"""

from gluoncv import model_zoo, data, utils
from matplotlib import pyplot as plt

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an SSD model trained with 512x512 images on Pascal VOC
# dataset with ResNet-50 V1 as the base model. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.

net = model_zoo.get_model('ssd_512_resnet50_v1_voc', pretrained=True)

######################################################################
# Pre-process an image
# --------------------
#
# Next we download an image, and pre-process with preset data transforms. Here we
# specify that we resize the short edge of the image to 512 px. But you can
# feed an arbitrarily sized image.
#
# You can provide a list of image file names, such as ``[im_fname1, im_fname2,
# ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you
# want to load multiple image together.
#
# This function returns two results. The first is a NDArray with shape
# `(batch_size, RGB_channels, height, width)`. It can be fed into the
# In terms of structure, YOLOv3 networks are composed of base feature extraction
# network, convolutional transition layers, upsampling layers, and specially designed YOLOv3 output layers.
#
# We highly recommend you to read the original paper to learn more about the ideas
# behind YOLO [YOLOv3]_.
#
# `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in YOLO networks, more on the way.
# You can load your favorate one with one simple line of code:
#
# .. hint::
#
#    To avoid downloading mdoel in this tutorial, we set `pretrained_base=False`,
#    in practice we usually want to load pre-trained imagenet models by setting
#    `pretrained_base=True`.
from gluoncv import model_zoo
net = model_zoo.get_model('yolo3_darknet53_voc', pretrained_base=False)
print(net)

##############################################################################
# YOLOv3 network is callable with image tensor
import mxnet as mx
x = mx.nd.zeros(shape=(1, 3, 416, 416))
net.initialize()
cids, scores, bboxes = net(x)

##############################################################################
# YOLOv3 returns three values, where ``cids`` are the class labels,
# ``scores`` are confidence scores of each prediction,
# and ``bboxes`` are absolute coordinates of corresponding bounding boxes.

    args = parse_args()
    # fix seed for mxnet, numpy and python builtin random generator.
    gutils.random.seed(args.seed)

    # training contexts
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]
    args.batch_size = len(ctx)  # 1 batch per device

    # network
    module_list = []
    if args.use_fpn:
        module_list.append('fpn')
    net_name = '_'.join(('mask_rcnn', *module_list, args.network, args.dataset))
    args.save_prefix += net_name
    net = get_model(net_name, pretrained_base=True)
    if args.resume.strip():
        net.load_parameters(args.resume.strip())
    else:
        for param in net.collect_params().values():
            if param._data is not None:
                continue
            param.initialize()
    net.collect_params().reset_ctx(ctx)

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
    train_data, val_data = get_dataloader(
        net, train_dataset, val_dataset, MaskRCNNDefaultTrainTransform, MaskRCNNDefaultValTransform,
        args.batch_size, args.num_workers, args.use_fpn)
Exemple #50
0
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
# noinspection PyUnresolvedReferences
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import torch
from common.camera import camera_to_world
from common.generators import UnchunkedGenerator
from common.model import TemporalModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1. 加载目标检测器和2d关键点检测器
detector_name = ['yolo3_mobilenet1.0_coco', 'yolo3_darknet53_coco']
posenet_name = ['simple_pose_resnet18_v1b', 'simple_pose_resnet101_v1b']
detector = model_zoo.get_model(detector_name[1], pretrained=True)
pose_net = model_zoo.get_model(posenet_name[1], pretrained=True)

# noinspection PyUnresolvedReferences
detector.reset_class(['person'], reuse_weights=['person'])


def detect_2d_joints(frame, short=360):
    """
    Args:
        short: 较短边resize大小
        frame: 任意尺寸的RGB图像

    Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素
    """
    # 缩放图像和生成目标检测器输入张量
def main():
    opt = parse_args()

    batch_size = opt.batch_size
    classes = 10

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf]

    model_name = opt.model
    if model_name.startswith('cifar_wideresnet'):
        kwargs = {'classes': classes,
                'drop_rate': opt.drop_rate}
    else:
        kwargs = {'classes': classes}
    net = get_model(model_name, **kwargs)
    model_name += '_mixup'
    if opt.resume_from:
        net.load_parameters(opt.resume_from, ctx = context)
    optimizer = 'nag'

    save_period = opt.save_period
    if opt.save_dir and save_period:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_period = 0

    plot_name = opt.save_plot_dir

    logging_handlers = [logging.StreamHandler()]
    if opt.logging_dir:
        logging_dir = opt.logging_dir
        makedirs(logging_dir)
        logging_handlers.append(logging.FileHandler('%s/train_cifar10_%s.log'%(logging_dir, model_name)))

    logging.basicConfig(level=logging.INFO, handlers = logging_handlers)
    logging.info(opt)

    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    ])

    def label_transform(label, classes):
        ind = label.astype('int')
        res = nd.zeros((ind.shape[0], classes), ctx = label.context)
        res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1
        return res

    def test(ctx, val_data):
        metric = mx.metric.Accuracy()
        for i, batch in enumerate(val_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            outputs = [net(X) for X in data]
            metric.update(label, outputs)
        return metric.get()

    def train(epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        net.initialize(mx.init.Xavier(), ctx=ctx)

        train_data = gluon.data.DataLoader(
            gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
            batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)

        val_data = gluon.data.DataLoader(
            gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
            batch_size=batch_size, shuffle=False, num_workers=num_workers)

        trainer = gluon.Trainer(net.collect_params(), optimizer,
                                {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum})
        metric = mx.metric.Accuracy()
        train_metric = mx.metric.RMSE()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
        train_history = TrainingHistory(['training-error', 'validation-error'])

        iteration = 0
        lr_decay_count = 0

        best_val_score = 0

        for epoch in range(epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)
            alpha = 1

            if epoch == lr_decay_epoch[lr_decay_count]:
                trainer.set_learning_rate(trainer.learning_rate*lr_decay)
                lr_decay_count += 1

            for i, batch in enumerate(train_data):
                lam = np.random.beta(alpha, alpha)
                if epoch >= epochs - 20:
                    lam = 1

                data_1 = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
                label_1 = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

                data = [lam*X + (1-lam)*X[::-1] for X in data_1]
                label = []
                for Y in label_1:
                    y1 = label_transform(Y, classes)
                    y2 = label_transform(Y[::-1], classes)
                    label.append(lam*y1 + (1-lam)*y2)

                with ag.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                output_softmax = [nd.SoftmaxActivation(out) for out in output]
                train_metric.update(label, output_softmax)
                name, acc = train_metric.get()
                iteration += 1

            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            name, val_acc = test(ctx, val_data)
            train_history.update([acc, 1-val_acc])
            train_history.plot(save_path='%s/%s_history.png'%(plot_name, model_name))

            if val_acc > best_val_score:
                best_val_score = val_acc
                net.save_parameters('%s/%.4f-cifar-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch))

            name, val_acc = test(ctx, val_data)
            logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' %
                (epoch, acc, val_acc, train_loss, time.time()-tic))

            if save_period and save_dir and (epoch + 1) % save_period == 0:
                net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epoch))

        if save_period and save_dir:
            net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epochs-1))

    if opt.mode == 'hybrid':
        net.hybridize()
    train(opt.num_epochs, context)
Exemple #52
0
    def __init__(self, options, logger):
        # configuration setting
        self.opt = options
        self.logger = logger
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_zoo)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        ################### model initialization ###################
        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        # create network
        if self.opt.model_zoo is not None:
            self.model = get_model(self.opt.model_zoo, pretrained_base=self.opt.pretrained_base,
                                   scales=self.opt.scales, ctx=self.opt.ctx)
        else:
            assert "Must choose a model from model_zoo, " \
                   "please provide the model_zoo using --model_zoo"
        self.logger.info(self.model)

        # resume checkpoint if needed
        if self.opt.resume is not None:
            if os.path.isfile(self.opt.resume):
                logger.info('Resume model: %s' % self.opt.resume)
                self.model.load_parameters(self.opt.resume, ctx=self.opt.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'".format(self.opt.resume))

        self.parameters_to_train = self.model.collect_params()

        if self.opt.hybridize:
            self.model.hybridize()

        ######################### dataloader #########################
        datasets_dict = {"kitti": KITTIRAWDataset,
                         "kitti_odom": KITTIOdomDataset}
        self.dataset = datasets_dict[self.opt.dataset]

        fpath = os.path.join(os.path.expanduser("~"), ".mxnet/datasets/kitti",
                             "splits", self.opt.split, "{}_files.txt")
        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png' if self.opt.png else '.jpg'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(
            self.opt.data_path, train_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, num_scales=4, is_train=True, img_ext=img_ext)
        self.train_loader = gluon.data.DataLoader(
            train_dataset, batch_size=self.opt.batch_size, shuffle=True,
            batchify_fn=dict_batchify_fn, num_workers=self.opt.num_workers,
            pin_memory=True, last_batch='discard')

        val_dataset = self.dataset(
            self.opt.data_path, val_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, num_scales=4, is_train=False, img_ext=img_ext)
        self.val_loader = gluon.data.DataLoader(
            val_dataset, batch_size=self.opt.batch_size, shuffle=False,
            batchify_fn=dict_batchify_fn, num_workers=self.opt.num_workers,
            pin_memory=True, last_batch='discard')

        ################### optimization setting ###################
        self.lr_scheduler = LRSequential([
            LRScheduler('step', base_lr=self.opt.learning_rate,
                        nepochs=self.opt.num_epochs - self.opt.warmup_epochs,
                        iters_per_epoch=len(train_dataset),
                        step_epoch=[self.opt.scheduler_step_size - self.opt.warmup_epochs])
        ])
        optimizer_params = {'lr_scheduler': self.lr_scheduler,
                            'learning_rate': self.opt.learning_rate}

        self.optimizer = gluon.Trainer(self.parameters_to_train, 'adam', optimizer_params)

        print("Training model named:\n  ", self.opt.model_zoo)
        print("Models are saved to:\n  ", self.opt.log_dir)
        print("Training is using:\n  ", "CPU" if self.opt.ctx[0] is mx.cpu() else "GPU")

        ################### loss function ###################
        if not self.opt.no_ssim:
            self.ssim = SSIM()

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2 ** scale)
            w = self.opt.width // (2 ** scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w, ctx=self.opt.ctx[0])
            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)

        ################### metrics ###################
        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"]

        print("Using split:\n  ", self.opt.split)
        print("There are {:d} training items and {:d} validation items\n".format(
            len(train_dataset), len(val_dataset)))

        self.save_opts()

        # for save best model
        self.best_delta1 = 0
        self.best_model = self.model
# -------------------
# GluonCV's Faster-RCNN implementation is a composite Gluon HybridBlock :py:class:`gluoncv.model_zoo.FasterRCNN`.
# In terms of structure, Faster-RCNN networks are composed of base feature extraction
# network, Region Proposal Network(including its own anchor system, proposal generator),
# region-aware pooling layers, class predictors and bounding box offset predictors.
#
# `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in Faster-RCNN networks, more on the way.
# You can load your favorate one with one simple line of code:
#
# .. hint::
#
#    To avoid downloading mdoel in this tutorial, we set ``pretrained_base=False``,
#    in practice we usually want to load pre-trained imagenet models by setting
#    ``pretrained_base=True``.
from gluoncv import model_zoo
net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained_base=False)
print(net)

##############################################################################
# Faster-RCNN network is callable with image tensor
import mxnet as mx
x = mx.nd.zeros(shape=(1, 3, 600, 800))
net.initialize()
cids, scores, bboxes = net(x)

##############################################################################
# Faster-RCNN returns three values, where ``cids`` are the class labels,
# ``scores`` are confidence scores of each prediction,
# and ``bboxes`` are absolute coordinates of corresponding bounding boxes.

##############################################################################
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5, verbose=False)

    if opt.kvstore is not None:
        kv = mx.kvstore.create(opt.kvstore)
        logger.info(
            'Distributed training with %d workers and current rank is %d' %
            (kv.num_workers, kv.rank))
    if opt.use_amp:
        amp.init()

    batch_size = opt.batch_size
    classes = opt.num_classes

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    logger.info('Total batch size is set to %d on %d GPUs' %
                (batch_size, num_gpus))
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]

    if opt.slowfast:
        optimizer = 'nag'
    else:
        optimizer = 'sgd'

    if opt.clip_grad > 0:
        optimizer_params = {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum,
            'clip_gradient': opt.clip_grad
        }
    else:
        optimizer_params = {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum
        }

    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    model_name = opt.model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    use_tsn=opt.use_tsn,
                    num_segments=opt.num_segments,
                    partial_bn=opt.partial_bn)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    logger.info(net)

    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)
        print('Continue training from model %s.' % (opt.resume_params))

    if opt.kvstore is not None:
        train_data, val_data, batch_fn = get_data_loader(
            opt, batch_size, num_workers, logger, kv)
    else:
        train_data, val_data, batch_fn = get_data_loader(
            opt, batch_size, num_workers, logger)

    num_batches = len(train_data)
    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=opt.warmup_lr,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])
    optimizer_params['lr_scheduler'] = lr_scheduler

    train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    def test(ctx, val_data, kvstore=None):
        acc_top1.reset()
        acc_top5.reset()
        L = gluon.loss.SoftmaxCrossEntropyLoss()
        num_test_iter = len(val_data)
        val_loss_epoch = 0
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = []
            for _, X in enumerate(data):
                X = X.reshape((-1, ) + X.shape[2:])
                pred = net(X.astype(opt.dtype, copy=False))
                outputs.append(pred)

            loss = [
                L(yhat, y.astype(opt.dtype, copy=False))
                for yhat, y in zip(outputs, label)
            ]

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

            val_loss_epoch += sum([l.mean().asscalar()
                                   for l in loss]) / len(loss)

            if opt.log_interval and not (i + 1) % opt.log_interval:
                _, top1 = acc_top1.get()
                _, top5 = acc_top5.get()
                logger.info('Batch [%04d]/[%04d]: acc-top1=%f acc-top5=%f' %
                            (i, num_test_iter, top1 * 100, top5 * 100))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        val_loss = val_loss_epoch / num_test_iter

        if kvstore is not None:
            top1_nd = nd.zeros(1)
            top5_nd = nd.zeros(1)
            val_loss_nd = nd.zeros(1)
            kvstore.push(111111, nd.array(np.array([top1])))
            kvstore.pull(111111, out=top1_nd)
            kvstore.push(555555, nd.array(np.array([top5])))
            kvstore.pull(555555, out=top5_nd)
            kvstore.push(999999, nd.array(np.array([val_loss])))
            kvstore.pull(999999, out=val_loss_nd)
            top1 = top1_nd.asnumpy() / kvstore.num_workers
            top5 = top5_nd.asnumpy() / kvstore.num_workers
            val_loss = val_loss_nd.asnumpy() / kvstore.num_workers

        return (top1, top5, val_loss)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        if opt.partial_bn:
            train_patterns = None
            if 'inceptionv3' in opt.model:
                train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var'
            else:
                logger.info(
                    'Current model does not support partial batch normalization.'
                )

            if opt.kvstore is not None:
                trainer = gluon.Trainer(net.collect_params(train_patterns),
                                        optimizer,
                                        optimizer_params,
                                        kvstore=kv,
                                        update_on_kvstore=False)
            else:
                trainer = gluon.Trainer(net.collect_params(train_patterns),
                                        optimizer,
                                        optimizer_params,
                                        update_on_kvstore=False)
        else:
            if opt.kvstore is not None:
                trainer = gluon.Trainer(net.collect_params(),
                                        optimizer,
                                        optimizer_params,
                                        kvstore=kv,
                                        update_on_kvstore=False)
            else:
                trainer = gluon.Trainer(net.collect_params(),
                                        optimizer,
                                        optimizer_params,
                                        update_on_kvstore=False)

        if opt.accumulate > 1:
            params = [
                p for p in net.collect_params().values()
                if p.grad_req != 'null'
            ]
            for p in params:
                p.grad_req = 'add'

        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.use_amp:
            amp.init_trainer(trainer)

        L = gluon.loss.SoftmaxCrossEntropyLoss()

        best_val_score = 0
        lr_decay_count = 0

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            train_metric.reset()
            btic = time.time()
            num_train_iter = len(train_data)
            train_loss_epoch = 0
            train_loss_iter = 0

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                with ag.record():
                    outputs = []
                    for _, X in enumerate(data):
                        X = X.reshape((-1, ) + X.shape[2:])
                        pred = net(X.astype(opt.dtype, copy=False))
                        outputs.append(pred)
                    loss = [
                        L(yhat, y.astype(opt.dtype, copy=False))
                        for yhat, y in zip(outputs, label)
                    ]

                    if opt.use_amp:
                        with amp.scale_loss(loss, trainer) as scaled_loss:
                            ag.backward(scaled_loss)
                    else:
                        ag.backward(loss)

                if opt.accumulate > 1 and (i + 1) % opt.accumulate == 0:
                    if opt.kvstore is not None:
                        trainer.step(batch_size * kv.num_workers *
                                     opt.accumulate)
                    else:
                        trainer.step(batch_size * opt.accumulate)
                        net.collect_params().zero_grad()
                else:
                    if opt.kvstore is not None:
                        trainer.step(batch_size * kv.num_workers)
                    else:
                        trainer.step(batch_size)

                train_metric.update(label, outputs)
                train_loss_iter = sum([l.mean().asscalar()
                                       for l in loss]) / len(loss)
                train_loss_epoch += train_loss_iter

                train_metric_name, train_metric_score = train_metric.get()
                sw.add_scalar(tag='train_acc_top1_iter',
                              value=train_metric_score * 100,
                              global_step=epoch * num_train_iter + i)
                sw.add_scalar(tag='train_loss_iter',
                              value=train_loss_iter,
                              global_step=epoch * num_train_iter + i)
                sw.add_scalar(tag='learning_rate_iter',
                              value=trainer.learning_rate,
                              global_step=epoch * num_train_iter + i)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    logger.info(
                        'Epoch[%03d] Batch [%04d]/[%04d]\tSpeed: %f samples/sec\t %s=%f\t loss=%f\t lr=%f'
                        % (epoch, i, num_train_iter,
                           batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score * 100, train_loss_epoch /
                           (i + 1), trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))
            mx.ndarray.waitall()

            if opt.kvstore is not None and epoch == opt.resume_epoch:
                kv.init(111111, nd.zeros(1))
                kv.init(555555, nd.zeros(1))
                kv.init(999999, nd.zeros(1))

            if opt.kvstore is not None:
                acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data, kv)
            else:
                acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data)

            logger.info('[Epoch %03d] training: %s=%f\t loss=%f' %
                        (epoch, train_metric_name, train_metric_score * 100,
                         train_loss_epoch / num_train_iter))
            logger.info('[Epoch %03d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info(
                '[Epoch %03d] validation: acc-top1=%f acc-top5=%f loss=%f' %
                (epoch, acc_top1_val * 100, acc_top5_val * 100, loss_val))

            sw.add_scalar(tag='train_loss_epoch',
                          value=train_loss_epoch / num_train_iter,
                          global_step=epoch)
            sw.add_scalar(tag='val_loss_epoch',
                          value=loss_val,
                          global_step=epoch)
            sw.add_scalar(tag='val_acc_top1_epoch',
                          value=acc_top1_val * 100,
                          global_step=epoch)

            if acc_top1_val > best_val_score:
                best_val_score = acc_top1_val
                net.save_parameters('%s/%.4f-%s-%s-%03d-best.params' %
                                    (opt.save_dir, best_val_score, opt.dataset,
                                     model_name, epoch))
                trainer.save_states('%s/%.4f-%s-%s-%03d-best.states' %
                                    (opt.save_dir, best_val_score, opt.dataset,
                                     model_name, epoch))
            else:
                if opt.save_frequency and opt.save_dir and (
                        epoch + 1) % opt.save_frequency == 0:
                    net.save_parameters(
                        '%s/%s-%s-%03d.params' %
                        (opt.save_dir, opt.dataset, model_name, epoch))
                    trainer.save_states(
                        '%s/%s-%s-%03d.states' %
                        (opt.save_dir, opt.dataset, model_name, epoch))

        # save the last model
        net.save_parameters(
            '%s/%s-%s-%03d.params' %
            (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1))
        trainer.save_states(
            '%s/%s-%s-%03d.states' %
            (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    train(context)
    sw.close()
gt_ids = mx.nd.ones(shape=(2, 4)) * -1
gt_ids[0, :1] = id1
gt_ids[1, :4] = id2
print('class_ids:', gt_ids)

############################################################################
gt_boxes = mx.nd.ones(shape=(2, 4, 4)) * -1
gt_boxes[0, :1, :] = bbox1
gt_boxes[1, :, :] = bbox2
print('bounding boxes:', gt_boxes)

############################################################################
# We use a vgg16 atrous 300x300 SSD model in this example. For demo purpose, we
# don't use any pretrained weights here
from gluoncv import model_zoo
net = model_zoo.get_model('ssd_300_vgg16_atrous_voc', pretrained_base=False, pretrained=False)

############################################################################
# Some preparation before training
from mxnet import gluon
net.initialize()
conf_loss = gluon.loss.SoftmaxCrossEntropyLoss()
loc_loss = gluon.loss.HuberLoss()

############################################################################
# Simulate the training steps by manually compute losses:
# You can always use ``gluoncv.loss.SSDMultiBoxLoss`` which fulfills this function.
from mxnet import autograd
from gluoncv.model_zoo.ssd.target import SSDTargetGenerator
target_generator = SSDTargetGenerator()
with autograd.record():
Exemple #56
0
def main():
    opt = parse_args()

    bps.init()
    gpu_name = subprocess.check_output(
        ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv'])
    gpu_name = gpu_name.decode('utf8').split('\n')[-2]
    gpu_name = '-'.join(gpu_name.split())
    filename = "imagenet-%d-%s-%s.log" % (bps.size(), gpu_name,
                                          opt.logging_file)
    filehandler = logging.FileHandler(filename)
    streamhandler = logging.StreamHandler()

    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)

    logger.info(opt)

    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167

    num_gpus = opt.num_gpus
    # batch_size *= max(1, num_gpus)
    context = mx.gpu(bps.local_rank()) if num_gpus > 0 else mx.cpu(
        bps.local_rank())
    num_workers = opt.num_workers
    nworker = bps.size()
    rank = bps.rank()

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // (batch_size * nworker)

    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=opt.warmup_lr,
                    target_lr=opt.lr * nworker / bps.local_size(),
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr * nworker / bps.local_size(),
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    model_name = opt.model

    kwargs = {
        'ctx': context,
        'pretrained': opt.use_pretrained,
        'classes': classes
    }
    if opt.use_gn:
        from gluoncv.nn import GroupNorm
        kwargs['norm_layer'] = GroupNorm
    if model_name.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm
    elif model_name.startswith('resnext'):
        kwargs['use_se'] = opt.use_se

    if opt.last_gamma:
        kwargs['last_gamma'] = True

    if opt.compressor:
        optimizer = 'sgd'
    else:
        optimizer = 'nag'

    optimizer_params = {
        'wd': opt.wd,
        'momentum': opt.momentum,
        'lr_scheduler': lr_scheduler
    }

    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    net = get_model(model_name, **kwargs)
    net.cast(opt.dtype)
    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)

    # teacher model for distillation training
    if opt.teacher is not None and opt.hard_weight < 1.0:
        teacher_name = opt.teacher
        teacher = get_model(teacher_name,
                            pretrained=True,
                            classes=classes,
                            ctx=context)
        teacher.cast(opt.dtype)
        distillation = True
    else:
        distillation = False

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx,
                     batch_size, num_workers):
        rec_train = os.path.expanduser(rec_train)
        rec_train_idx = os.path.expanduser(rec_train_idx)
        rec_val = os.path.expanduser(rec_val)
        rec_val_idx = os.path.expanduser(rec_val_idx)
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))
        mean_rgb = [123.68, 116.779, 103.939]
        std_rgb = [58.393, 57.12, 57.375]

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch.data[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        train_data = mx.io.ImageRecordIter(path_imgrec=rec_train,
                                           path_imgidx=rec_train_idx,
                                           preprocess_threads=num_workers,
                                           shuffle=True,
                                           batch_size=batch_size,
                                           data_shape=(3, input_size,
                                                       input_size),
                                           mean_r=mean_rgb[0],
                                           mean_g=mean_rgb[1],
                                           mean_b=mean_rgb[2],
                                           std_r=std_rgb[0],
                                           std_g=std_rgb[1],
                                           std_b=std_rgb[2],
                                           rand_mirror=True,
                                           random_resized_crop=True,
                                           max_aspect_ratio=4. / 3.,
                                           min_aspect_ratio=3. / 4.,
                                           max_random_area=1,
                                           min_random_area=0.08,
                                           brightness=jitter_param,
                                           saturation=jitter_param,
                                           contrast=jitter_param,
                                           pca_noise=lighting_param,
                                           num_parts=nworker,
                                           part_index=rank)
        val_data = mx.io.ImageRecordIter(path_imgrec=rec_val,
                                         path_imgidx=rec_val_idx,
                                         preprocess_threads=num_workers,
                                         shuffle=False,
                                         batch_size=batch_size,
                                         resize=resize,
                                         data_shape=(3, input_size,
                                                     input_size),
                                         mean_r=mean_rgb[0],
                                         mean_g=mean_rgb[1],
                                         mean_b=mean_rgb[2],
                                         std_r=std_rgb[0],
                                         std_g=std_rgb[1],
                                         std_b=std_rgb[2],
                                         num_parts=nworker,
                                         part_index=rank)
        return train_data, val_data, batch_fn

    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(), normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(resize, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(), normalize
        ])

        train_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=True).transform_first(transform_train),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           last_batch='discard',
                                           num_workers=num_workers)
        val_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=False).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        return train_data, val_data, batch_fn

    if opt.use_rec:
        train_data, val_data, batch_fn = get_data_rec(opt.rec_train,
                                                      opt.rec_train_idx,
                                                      opt.rec_val,
                                                      opt.rec_val_idx,
                                                      batch_size, num_workers)
    else:
        train_data, val_data, batch_fn = get_data_loader(
            opt.data_dir, batch_size, num_workers)

    if opt.mixup:
        train_metric = mx.metric.RMSE()
    else:
        train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def mixup_transform(label, classes, lam=1, eta=0.0):
        if isinstance(label, nd.NDArray):
            label = [label]
        res = []
        for l in label:
            y1 = l.one_hot(classes,
                           on_value=1 - eta + eta / classes,
                           off_value=eta / classes)
            y2 = l[::-1].one_hot(classes,
                                 on_value=1 - eta + eta / classes,
                                 off_value=eta / classes)
            res.append(lam * y1 + (1 - lam) * y2)
        return res

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            res = l.one_hot(classes,
                            on_value=1 - eta + eta / classes,
                            off_value=eta / classes)
            smoothed.append(res)
        return smoothed

    def test(ctx, val_data):
        if opt.use_rec:
            val_data.reset()
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (1 - top1, 1 - top5)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params is '':
            net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        compression_params = {
            "compressor": opt.compressor,
            "ef": opt.ef,
            "momentum": opt.compress_momentum,
            "scaling": opt.onebit_scaling,
            "k": opt.k
        }

        trainer = bps.DistributedTrainer(net.collect_params(),
                                         optimizer,
                                         optimizer_params,
                                         compression_params=compression_params)

        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(
                temperature=opt.temperature,
                hard_weight=opt.hard_weight,
                sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(
                sparse_label=sparse_label_loss)

        best_val_score = 1

        # bps.byteps_declare_tensor("acc")
        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            if opt.use_rec:
                train_data.reset()
            train_metric.reset()
            btic = time.time()

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    teacher_prob = [
                        nd.softmax(
                            teacher(X.astype(opt.dtype, copy=False)) /
                            opt.temperature) for X in data
                    ]

                with ag.record():
                    outputs = [
                        net(X.astype(opt.dtype, copy=False)) for X in data
                    ]
                    if distillation:
                        loss = [
                            L(yhat.astype('float32', copy=False),
                              y.astype('float32', copy=False),
                              p.astype('float32', copy=False))
                            for yhat, y, p in zip(outputs, label, teacher_prob)
                        ]
                    else:
                        loss = [
                            L(yhat, y.astype(opt.dtype, copy=False))
                            for yhat, y in zip(outputs, label)
                        ]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)

                if opt.mixup:
                    output_softmax = [
                        nd.SoftmaxActivation(out.astype('float32', copy=False))
                        for out in outputs
                    ]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    train_metric_name, train_metric_score = train_metric.get()
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f\ttime=%f'
                        % (epoch, i, batch_size * nworker * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score, trainer.learning_rate,
                           time.time() - btic))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * nworker * i / (time.time() - tic))

            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data)

            # acc = mx.nd.array([train_metric_score, err_top1_val, err_top5_val],
            #                   ctx=ctx[0])
            # bps.byteps_push_pull(acc, name="acc", is_average=False)
            # acc /= bps.size()
            # train_metric_score, err_top1_val, err_top5_val = acc[0].asscalar(
            # ), acc[1].asscalar(), acc[2].asscalar()

            # if bps.rank() == 0:
            logger.info('[Epoch %d] training: %s=%f' %
                        (epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' %
                        (epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters(
                    '%s/%.4f-imagenet-%s-%d-best.params' %
                    (save_dir, best_val_score, model_name, epoch))
                trainer.save_states(
                    '%s/%.4f-imagenet-%s-%d-best.states' %
                    (save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch +
                                                1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params' %
                                    (save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states' %
                                    (save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params' %
                                (save_dir, model_name, opt.num_epochs - 1))
            trainer.save_states('%s/imagenet-%s-%d.states' %
                                (save_dir, model_name, opt.num_epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
        if distillation:
            teacher.hybridize(static_alloc=True, static_shape=True)
    train(context)
Exemple #57
0
                                 num_workers=num_workers)

################################################################################
#
# Note that only ``train_data`` uses ``transform_train``, while
# ``val_data`` and ``test_data`` use ``transform_test`` to produce deterministic
# results for evaluation.
#
# Model and Trainer
# -----------------
#
# We use a pre-trained ``ResNet50_v2`` model, which has balanced accuracy and
# computation cost.

model_name = 'ResNet50_v2'
finetune_net = get_model(model_name, pretrained=True)
with finetune_net.name_scope():
    finetune_net.output = nn.Dense(num_classes)
finetune_net.output.initialize(init.Xavier(), ctx=ctx)
finetune_net.collect_params().reset_ctx(ctx)
finetune_net.hybridize()

trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
    'learning_rate': lr,
    'momentum': momentum,
    'wd': wd
})
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()

################################################################################
Exemple #58
0
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
    ctx = ctx if ctx else [mx.cpu()]
    args.batch_size = len(ctx)  # 1 batch per device

    # network
    net_name = '_'.join(('faster_rcnn', cfg.BACKBONE.NAME, cfg.DATASET.TYPE))
    time_str = time.strftime("%m%d_%H%M")
    args.logdir = os.path.join(args.logdir, "{}_{}".format(net_name, time_str))

    # set up logger
    logger.set_logger_dir(args.logdir, 'd')
    logger.info("Config: ------------------------------------------\n" + \
            pprint.pformat(cfg.to_dict(), indent=1, width=100, compact=True))

    net = get_model(net_name,
                    pretrained_base=True,
                    dtype='float16' if cfg.GENERAL.FP16 else 'float32')
    if cfg.GENERAL.FP16:
        net.cast('float16')
    if args.load.strip():
        net.load_parameters(args.load.strip())
    else:
        for param in net.collect_params().values():
            if param._data is not None:
                continue
            param.initialize()
    net.collect_params().reset_ctx(ctx)

    # training data
    train_dataset, val_dataset, eval_metric = get_dataset(
        cfg.DATASET.TYPE, args)
from gluoncv import model_zoo, data, utils

######################################################################
# Load a pretrained model
# -------------------------
#
# Let's get an Faster RCNN model trained on Pascal VOC
# dataset with ResNet-50 backbone. By specifying
# ``pretrained=True``, it will automatically download the model from the model
# zoo if necessary. For more pretrained models, please refer to
# :doc:`../../model_zoo/index`.
#
# The returned model is a HybridBlock :py:class:`gluoncv.model_zoo.FasterRCNN`
# with a default context of `cpu(0)`.

net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained=True)

######################################################################
# Pre-process an image
# --------------------
#
# Next we download an image, and pre-process with preset data transforms.
# The default behavior is to resize the short edge of the image to 600px.
# But you can feed an arbitrarily sized image.
#
# You can provide a list of image file names, such as ``[im_fname1, im_fname2,
# ...]`` to :py:func:`gluoncv.data.transforms.presets.rcnn.load_test` if you
# want to load multiple image together.
#
# This function returns two results. The first is a NDArray with shape
# `(batch_size, RGB_channels, height, width)`. It can be fed into the
def train(train_path, val_path, test_path):
    # Initialize the net with pretrained model
    finetune_net = get_model(model_name, pretrained=True)
    with finetune_net.name_scope():
        finetune_net.output = nn.Dense(classes)
    finetune_net.output.initialize(init.Xavier(), ctx = ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Define DataLoader
    train_data = gluon.data.DataLoader(
        gluon.data.vision.ImageFolderDataset(train_path).transform_first(transform_train),
        batch_size=batch_size, shuffle=True, num_workers=num_workers)

    val_data = gluon.data.DataLoader(
        gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test),
        batch_size=batch_size, shuffle=False, num_workers = num_workers)

    test_data = gluon.data.DataLoader(
        gluon.data.vision.ImageFolderDataset(test_path).transform_first(transform_test),
        batch_size=batch_size, shuffle=False, num_workers = num_workers)

    # Define Trainer
    trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
        'learning_rate': lr, 'momentum': momentum, 'wd': wd})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    lr_counter = 0
    num_batch = len(train_data)

    # Start Training
    for epoch in range(epochs):
        if epoch == lr_steps[lr_counter]:
            trainer.set_learning_rate(trainer.learning_rate*lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for l in loss:
                l.backward()

            trainer.step(batch_size)
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

            metric.update(label, outputs)

        _, train_acc = metric.get()
        train_loss /= num_batch

        _, val_acc = test(finetune_net, val_data, ctx)

        logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f | time: %.1f' %
                 (epoch, train_acc, train_loss, val_acc, time.time() - tic))

    _, test_acc = test(finetune_net, test_data, ctx)
    logging.info('[Finished] Test-acc: %.3f' % (test_acc))