def convert_from_gluon(model_name, image_shape, classes=1000, logger=None): dir_path = os.path.dirname(os.path.realpath(__file__)) model_path = os.path.join(dir_path, 'model') if logger is not None: logger.info('Converting model from Gluon-CV ModelZoo %s... into path %s' % (model_name, model_path)) net = get_model(name=model_name, classes=classes, pretrained=True) net.hybridize() x = mx.sym.var('data') y = net(x) y = mx.sym.SoftmaxOutput(data=y, name='softmax') symnet = mx.symbol.load_json(y.tojson()) params = net.collect_params() args = {} auxs = {} for param in params.values(): v = param._reduce() k = param.name if 'running' in k: auxs[k] = v else: args[k] = v mod = mx.mod.Module(symbol=symnet, context=mx.cpu(), label_names = ['softmax_label']) mod.bind(for_training=False, data_shapes=[('data', (1,) + tuple([int(i) for i in image_shape.split(',')]))]) mod.set_params(arg_params=args, aux_params=auxs) dst_dir = os.path.join(dir_path, 'model') prefix = os.path.join(dir_path, 'model', model_name) if not os.path.isdir(dst_dir): os.mkdir(dst_dir) mod.save_checkpoint(prefix, 0) return prefix
def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} trainset = get_segmentation_dataset( args.dataset, split=args.train_split, mode='train', **data_kwargs) valset = get_segmentation_dataset( args.dataset, split='val', mode='val', **data_kwargs) self.train_data = gluon.data.DataLoader( trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, crop_size=args.crop_size) model.cast(args.dtype) print(model) self.net = DataParallelModel(model, args.ctx, args.syncbn) self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # create criterion criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight) self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn) # optimizer and lr scheduling self.lr_scheduler = LRScheduler(mode='poly', baselr=args.lr, niters=len(self.train_data), nepochs=args.epochs) kv = mx.kv.create(args.kvstore) optimizer_params = {'lr_scheduler': self.lr_scheduler, 'wd':args.weight_decay, 'momentum': args.momentum} if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.module.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd', optimizer_params, kvstore = kv) # evaluation metrics self.metric = gluoncv.utils.metrics.SegmentationMetric(trainset.num_class)
def __init__(self, num_clothes, num_colors, ctx): super(fashion_net_2_branches, self).__init__() self._features = model_zoo.get_model('mobilenetv2_1.0', pretrained=True, ctx = ctx).features for _, w in self._features.collect_params().items(): w.grad_req = 'null' self._flatten = nn.Flatten() self._relu = nn.Activation(activation='relu') self._swish = nn.Swish() self._clothes_fc_1 = nn.Dense(100) self._clothes_bn = nn.BatchNorm(center=False, scale=True) self._clothes_out = nn.Dense(num_clothes) self._clothes_fc_1.initialize(init=init.Xavier(), ctx=ctx) self._clothes_bn.initialize(init=init.Zero(), ctx=ctx) self._clothes_out.initialize(init=init.Xavier(), ctx=ctx) self._color_fc_1 = nn.Dense(100) self._color_bn_1 = nn.BatchNorm(center=False, scale=True) self._color_fc_2 = nn.Dense(50) self._color_bn_2 = nn.BatchNorm(center=False, scale=True) self._color_out = nn.Dense(num_colors) self._color_fc_1.initialize(init=init.Xavier(), ctx=ctx) self._color_bn_1.initialize(init=init.Zero(), ctx=ctx) self._color_fc_2.initialize(init=init.Xavier(), ctx=ctx) self._color_bn_2.initialize(init=init.Zero(), ctx=ctx) self._color_out.initialize(init=init.Xavier(), ctx=ctx)
# tutorial. In addition, we use the the idea of temporal segments (TSN) [Wang16]_ # to wrap the backbone VGG16 network for adaptation to video domain. # # `TSN <https://arxiv.org/abs/1608.00859>`_ is a widely adopted video # classification method. It is proposed to incorporate temporal information from an entire video. # The idea is straightforward: we can evenly divide the video into several segments, # process each segment individually, obtain segmental consensus from each segment, and perform # final prediction. TSN is more like a general algorithm, rather than a specific network architecture. # It can work with both 2D and 3D neural networks. # number of GPUs to use num_gpus = 1 ctx = [mx.gpu(i) for i in range(num_gpus)] # Get the model vgg16_ucf101 with temporal segment network, with 101 output classes, without pre-trained weights net = get_model(name='vgg16_ucf101', nclass=101, num_segments=3) net.collect_params().reset_ctx(ctx) print(net) ################################################################ # Data Augmentation and Data Loader # --------------------------------- # # Data augmentation for video is different from image. For example, if you # want to randomly crop a video sequence, you need to make sure all the video # frames in this sequence undergo the same cropping process. We provide a # new set of transformation functions, working with multiple images. # Please checkout the `video.py <../../../gluoncv/data/transforms/video.py>`_ for more details. # Most video data augmentation strategies used here are introduced in [Wang15]_. transform_train = transforms.Compose([
# transpose it to `num_channels*height*width`, # and normalize with mean and standard deviation calculated across all CIFAR10 images. # # What does the transformed image look like? img = transform_fn(img) plt.imshow(nd.transpose(img, (1,2,0)).asnumpy()) plt.show() ################################################################ # Can't recognize anything? *Don't panic!* Neither do I. # The transformation makes it more "model-friendly", instead of "human-friendly". # # Next, we load a pre-trained model. net = get_model('cifar_resnet110_v1', classes=10, pretrained=True) ################################################################ # # Finally, we prepare the image and feed it to the model pred = net(img.expand_dims(axis=0)) class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] ind = nd.argmax(pred, axis=1).astype('int') print('The input picture is classified as [%s], with probability %.3f.'% (class_names[ind.asscalar()], nd.softmax(pred)[0][ind].asscalar())) ################################################################ # Play with the scripts
if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes if __name__ == '__main__': ctx = mx.cpu() detector_name = "ssd_512_mobilenet1.0_coco" detector = get_model(detector_name, pretrained=True, ctx=ctx) detector.reset_class(classes=['person'], reuse_weights={'person':'person'}) net = get_model('simple_pose_resnet18_v1b', pretrained='ccd24037', ctx=ctx) cap = cv2.VideoCapture(0) time.sleep(1) ### letting the camera autofocus axes = None for i in range(opt.num_frames): ret, frame = cap.read() frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8') axes = keypoint_detection(frame, detector, net, ctx, axes=axes)
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = {'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes} if opt.use_gn: from gluoncv.nn import GroupNorm kwargs['norm_layer'] = GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True optimizer = 'nag' optimizer_params = {'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler} if opt.dtype != 'float32': optimizer_params['multi_precision'] = True net = get_model(model_name, **kwargs) net.cast(opt.dtype) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx = context) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher teacher = get_model(teacher_name, pretrained=True, classes=classes, ctx=context) teacher.cast(opt.dtype) distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers): rec_train = os.path.expanduser(rec_train) rec_train_idx = os.path.expanduser(rec_train_idx) rec_val = os.path.expanduser(rec_val) rec_val_idx = os.path.expanduser(rec_val_idx) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) mean_rgb = [123.68, 116.779, 103.939] std_rgb = [58.393, 57.12, 57.375] def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return data, label train_data = mx.io.ImageRecordIter( path_imgrec = rec_train, path_imgidx = rec_train_idx, preprocess_threads = num_workers, shuffle = True, batch_size = batch_size, data_shape = (3, input_size, input_size), mean_r = mean_rgb[0], mean_g = mean_rgb[1], mean_b = mean_rgb[2], std_r = std_rgb[0], std_g = std_rgb[1], std_b = std_rgb[2], rand_mirror = True, random_resized_crop = True, max_aspect_ratio = 4. / 3., min_aspect_ratio = 3. / 4., max_random_area = 1, min_random_area = 0.08, brightness = jitter_param, saturation = jitter_param, contrast = jitter_param, pca_noise = lighting_param, ) val_data = mx.io.ImageRecordIter( path_imgrec = rec_val, path_imgidx = rec_val_idx, preprocess_threads = num_workers, shuffle = False, batch_size = batch_size, resize = resize, data_shape = (3, input_size, input_size), mean_r = mean_rgb[0], mean_g = mean_rgb[1], mean_b = mean_rgb[2], std_r = std_rgb[0], std_g = std_rgb[1], std_b = std_rgb[2], ) return train_data, val_data, batch_fn def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn if opt.use_rec: train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx, batch_size, num_workers) else: train_data, val_data, batch_fn = get_data_loader(opt.data_dir, batch_size, num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes) y2 = l[::-1].one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes) res.append(lam*y1 + (1-lam)*y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value = 1 - eta + eta/classes, off_value = eta/classes) smoothed.append(res) return smoothed def test(ctx, val_data): if opt.use_rec: val_data.reset() acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (1-top1, 1-top5) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params is '': net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss) best_val_score = 1 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() if opt.use_rec: train_data.reset() train_metric.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam*X + (1-lam)*X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ for X in data] with ag.record(): outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] if distillation: loss = [L(yhat.astype('float32', copy=False), y.astype('float32', copy=False), p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)] else: loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) if opt.log_interval and not (i+1)%opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'%( epoch, i, batch_size*opt.log_interval/(time.time()-btic), train_metric_name, train_metric_score, trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i /(time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f'%(epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch)) trainer.save_states('%s/%.4f-imagenet-%s-%d-best.states'%(save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1)) trainer.save_states('%s/imagenet-%s-%d.states'%(save_dir, model_name, opt.num_epochs-1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if distillation: teacher.hybridize(static_alloc=True, static_shape=True) train(context)
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] args.batch_size = len(ctx) # 1 batch per device # network kwargs = {} module_list = [] if args.use_fpn: module_list.append('fpn') if args.norm_layer is not None: module_list.append(args.norm_layer) if args.norm_layer == 'bn': kwargs['num_devices'] = len(args.gpus.split(',')) net_name = '_'.join(('faster_rcnn', *module_list, args.network, args.dataset)) args.save_prefix += net_name net = get_model(net_name, pretrained_base=True, **kwargs) if args.resume.strip(): net.load_parameters(args.resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) train_data, val_data = get_dataloader( net, train_dataset, val_dataset, FasterRCNNDefaultTrainTransform, FasterRCNNDefaultValTransform, args.batch_size, args.num_workers, args.use_fpn)
print('hello') import os.path print(args.train) print( os.path.isfile(os.path.join(args.train, 'birds_ssd_sample_train.rec'))) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = '_'.join(('yolo3', args.network, args.dataset)) args.save_prefix += net_name # use sync bn if specified if args.syncbn and len(ctx) > 1: net = get_model(net_name, pretrained_base=True, norm_layer=gluon.contrib.nn.SyncBatchNorm, norm_kwargs={'num_devices': len(ctx)}) async_net = get_model(net_name, pretrained_base=False) # used by cpu worker else: net = get_model(net_name, pretrained_base=True) async_net = net if args.resume.strip(): net.load_parameters(args.resume.strip()) async_net.load_parameters(args.resume.strip()) else: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") net.initialize() async_net.initialize()
[pos[j, 1], pos[j_parent, 1]], [pos[j, 2], pos[j_parent, 2]], zdir='z', c=col) if save: plt.savefig(save_path) ckpt_dir = '../../checkpoint/detectron_pt_coco' ckpt_name = 'arc_1_ch_1024_epoch_40.bin' filter_widths = [1, 1, 1] pose3d_predictor = get_pose3d_predictor(ckpt_dir, ckpt_name, filter_widths) detector_name = ['yolo3_mobilenet1.0_coco', 'yolo3_darknet53_coco'] detector = model_zoo.get_model(detector_name[0], pretrained=True) pose_net = model_zoo.get_model('alpha_pose_resnet101_v1b_coco', pretrained=True) # reset the detector to only detect human # noinspection PyUnresolvedReferences detector.reset_class(['person'], reuse_weights=['person']) def predict(img_path): # 1.预处理输入图像和检测人体 x, img = data.transforms.presets.yolo.load_test(img_path, short=256) start = time.time() # detect persons and bbox
if __name__ == '__main__': args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = '_'.join(('yolo3', args.network, args.dataset)) args.save_prefix += net_name # use sync bn if specified num_sync_bn_devices = len(ctx) if args.syncbn else -1 if num_sync_bn_devices > 1: net = get_model(net_name, pretrained_base=True, num_sync_bn_devices=num_sync_bn_devices) async_net = get_model(net_name, pretrained_base=False) # used by cpu worker else: net = get_model(net_name, pretrained_base=True) async_net = net if args.resume.strip(): net.load_parameters(args.resume.strip()) async_net.load_parameters(args.resume.strip()) else: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") net.initialize() async_net.initialize() # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr,, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = {'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes} if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se optimizer = 'nag' optimizer_params = {'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler} if opt.dtype != 'float32': optimizer_params['multi_precision'] = True net = get_model(model_name, **kwargs) net.cast(opt.dtype) # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers): rec_train = os.path.expanduser(rec_train) rec_train_idx = os.path.expanduser(rec_train_idx) rec_val = os.path.expanduser(rec_val) rec_val_idx = os.path.expanduser(rec_val_idx) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size mean_rgb = [123.68, 116.779, 103.939] std_rgb = [58.393, 57.12, 57.375] def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return data, label train_data = mx.io.ImageRecordIter( path_imgrec = rec_train, path_imgidx = rec_train_idx, preprocess_threads = num_workers, shuffle = True, batch_size = batch_size, data_shape = (3, input_size, input_size), mean_r = mean_rgb[0], mean_g = mean_rgb[1], mean_b = mean_rgb[2], std_r = std_rgb[0], std_g = std_rgb[1], std_b = std_rgb[2], rand_mirror = True, random_resized_crop = True, max_aspect_ratio = 4. / 3., min_aspect_ratio = 3. / 4., max_random_area = 1, min_random_area = 0.08, brightness = jitter_param, saturation = jitter_param, contrast = jitter_param, pca_noise = lighting_param, ) val_data = mx.io.ImageRecordIter( path_imgrec = rec_val, path_imgidx = rec_val_idx, preprocess_threads = num_workers, shuffle = False, batch_size = batch_size, resize = 256, data_shape = (3, input_size, input_size), mean_r = mean_rgb[0], mean_g = mean_rgb[1], mean_b = mean_rgb[2], std_r = std_rgb[0], std_g = std_rgb[1], std_b = std_rgb[2], ) return train_data, val_data, batch_fn def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(256, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn if opt.use_rec: train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx, batch_size, num_workers) else: train_data, val_data, batch_fn = get_data_loader(opt.data_dir, batch_size, num_workers) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1_aux = mx.metric.Accuracy() acc_top5_aux = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: ind = l.astype('int') res = nd.zeros((ind.shape[0], classes), ctx = l.context) res += eta/classes res[nd.arange(ind.shape[0], ctx = l.context), ind] = 1 - eta + eta/classes smoothed.append(res) return smoothed def test(ctx, val_data): if opt.use_rec: val_data.reset() acc_top1.reset() acc_top5.reset() acc_top1_aux.reset() acc_top5_aux.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, [o[0] for o in outputs]) acc_top5.update(label, [o[0] for o in outputs]) acc_top1_aux.update(label, [o[1] for o in outputs]) acc_top5_aux.update(label, [o[1] for o in outputs]) _, top1 = acc_top1.get() _, top5 = acc_top5.get() _, top1_aux = acc_top1_aux.get() _, top5_aux = acc_top5_aux.get() return (1-top1, 1-top5, 1-top1_aux, 1-top5_aux) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.MSRAPrelu(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.label_smoothing: L = MixSoftmaxCrossEntropyLoss(sparse_label=False, aux_weight=0.4) else: L = MixSoftmaxCrossEntropyLoss(aux_weight=0.4) best_val_score = 1 for epoch in range(opt.num_epochs): tic = time.time() if opt.use_rec: train_data.reset() acc_top1.reset() acc_top5.reset() acc_top1_aux.reset() acc_top5_aux.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.label_smoothing: label_smooth = smooth(label, classes) else: label_smooth = label with ag.record(): outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] loss = [L(yhat[0], yhat[1], y) for yhat, y in zip(outputs, label_smooth)] for l in loss: l.backward() trainer.step(batch_size) acc_top1.update(label, [o[0] for o in outputs]) acc_top5.update(label, [o[0] for o in outputs]) acc_top1_aux.update(label, [o[1] for o in outputs]) acc_top5_aux.update(label, [o[1] for o in outputs]) if opt.log_interval and not (i+1)%opt.log_interval: _, top1 = acc_top1.get() _, top5 = acc_top5.get() _, top1_aux = acc_top1_aux.get() _, top5_aux = acc_top5_aux.get() err_top1, err_top5, err_top1_aux, err_top5_aux = (1-top1, 1-top5, 1-top1_aux, 1-top5_aux) logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t' 'top1-err=%f\ttop5-err=%f\ttop1-err-aux=%f\ttop5-err-aux=%f'%( epoch, i, batch_size*opt.log_interval/(time.time()-btic), err_top1, err_top5, err_top1_aux, err_top5_aux)) btic = time.time() _, top1 = acc_top1.get() _, top5 = acc_top5.get() _, top1_aux = acc_top1_aux.get() _, top5_aux = acc_top5_aux.get() err_top1, err_top5, err_top1_aux, err_top5_aux = (1-top1, 1-top5, 1-top1_aux, 1-top5_aux) err_top1_val, err_top5_val, err_top1_val_aux, err_top5_val_aux = test(ctx, val_data) logger.info('[Epoch %d] training: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'% (epoch, err_top1, err_top5, err_top1_aux, err_top5_aux)) logger.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f err-top1_aux=%f err-top5_aux=%f'% (epoch, err_top1_val, err_top5_val, err_top1_val_aux, err_top5_val_aux)) if err_top1_val < best_val_score and epoch > 50: best_val_score = err_top1_val net.save_parameters('%s/%.4f-imagenet-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params'%(save_dir, model_name, opt.num_epochs-1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) train(context)
First let's import some necessary libraries: """ from matplotlib import pyplot as plt import gluoncv from gluoncv import model_zoo, data, utils ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an Faster RCNN model trained on COCO # dataset with ResNet-50 backbone. net = model_zoo.get_model('faster_rcnn_resnet50_v1b_coco', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # Similar to faster rcnn inference tutorial, we grab and preprocess a demo image im_fname = utils.download('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', path='biking.jpg') x, orig_img = data.transforms.presets.rcnn.load_test(im_fname) ###################################################################### # Reset classes to exactly what we want # ------------------------------------- # Original COCO model has 80 classes
""" from gluoncv import model_zoo, data, utils from matplotlib import pyplot as plt ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an YOLOv3 model trained with on Pascal VOC # dataset with Darknet53 as the base model. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. net = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # # Next we download an image, and pre-process with preset data transforms. Here we # specify that we resize the short edge of the image to 512 px. You can # feed an arbitrarily sized image. # Once constraint for YOLO is that input height and width can be divided by 32. # # You can provide a list of image file names, such as ``[im_fname1, im_fname2, # ...]`` to :py:func:`gluoncv.data.transforms.presets.yolo.load_test` if you # want to load multiple image together. # # This function returns two results. The first is a NDArray with shape
from gluoncv.utils.viz import plot_keypoints parser = argparse.ArgumentParser(description='Predict ImageNet classes from a given image') parser.add_argument('--detector', type=str, default='yolo3_mobilenet1.0_coco', help='name of the detection model to use') parser.add_argument('--pose-model', type=str, default='simple_pose_resnet50_v1b', help='name of the pose estimation model to use') parser.add_argument('--input-pic', type=str, required=True, help='path to the input picture') opt = parser.parse_args() def keypoint_detection(img_path, detector, pose_net): x, img = data.transforms.presets.yolo.load_test(img_path, short=512) class_IDs, scores, bounding_boxs = detector(x) pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs) predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) ax = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2) plt.show() if __name__ == '__main__': detector = get_model(opt.detector, pretrained=True) detector.reset_class(["person"], reuse_weights=['person']) net = get_model(opt.pose_model, pretrained=True) keypoint_detection(opt.input_pic, detector, net)
############################################################################# # Model Definition # ----------------- # # A Simple Pose model consists of a main body of a resnet, and several deconvolution layers. # Its final layer is a convolution layer predicting one heatmap for each keypoint. # # Let's take a look at the smallest one from the GluonCV Model Zoo, using ``ResNet18`` as its base model. # # We load the pre-trained parameters for the ``ResNet18`` layers, # and initialize the deconvolution layer and the final convolution layer. context = mx.gpu(0) net = get_model('simple_pose_resnet18_v1b', num_joints=17, pretrained_base=True, ctx=context, pretrained_ctx=context) net.deconv_layers.initialize(ctx=context) net.final_layer.initialize(ctx=context) ############################################################################# # We can take a look at the summary of the model x = mx.nd.ones((1, 3, 256, 192), ctx=context) net.summary(x) ############################################################################# # # .. note:: # # The Batch Normalization implementation from cuDNN has a negative impact on the model training, # as reported in these issues [2]_, [3]_ .
def main(): opt = parse_args() makedirs(opt.log_dir) filehandler = logging.FileHandler(opt.log_dir + '/' + opt.logging_file) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) sw = SummaryWriter(logdir=opt.log_dir, flush_secs=5, verbose=False) optimizer = 'sgd' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True #net = ghostnet(num_classes=classes, width=opt.width, dropout=opt.dropout) net = ghostnet() net.cast(opt.dtype) #net.hybridize() if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher teacher = get_model(teacher_name, pretrained=True, classes=classes, ctx=context) teacher.cast(opt.dtype) distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers, seed): rec_train = os.path.expanduser(rec_train) rec_train_idx = os.path.expanduser(rec_train_idx) rec_val = os.path.expanduser(rec_val) rec_val_idx = os.path.expanduser(rec_val_idx) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) mean_rgb = [123.68, 116.779, 103.939] std_rgb = [58.393, 57.12, 57.375] def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return data, label train_data = mx.io.ImageRecordIter( path_imgrec=rec_train, path_imgidx=rec_train_idx, preprocess_threads=num_workers, shuffle=True, batch_size=batch_size, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], rand_mirror=True, random_resized_crop=True, max_aspect_ratio=4. / 3., min_aspect_ratio=3. / 4., max_random_area=1, min_random_area=0.08, brightness=jitter_param, saturation=jitter_param, contrast=jitter_param, pca_noise=lighting_param, seed=seed, seed_aug=seed, shuffle_chunk_seed=seed, ) val_data = mx.io.ImageRecordIter( path_imgrec=rec_val, path_imgidx=rec_val_idx, preprocess_threads=num_workers, shuffle=False, batch_size=batch_size, resize=resize, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], ) return train_data, val_data, batch_fn def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn if opt.use_rec: if opt.use_dali: train_data = dali.get_data_rec((3, opt.input_size, opt.input_size), opt.crop_ratio, opt.rec_train, opt.rec_train_idx, opt.batch_size, num_workers=2, train=True, shuffle=True, backend='dali-gpu', gpu_ids=[0, 1], kv_store='nccl', dtype=opt.dtype, input_layout='NCHW') val_data = dali.get_data_rec((3, opt.input_size, opt.input_size), opt.crop_ratio, opt.rec_val, opt.rec_val_idx, opt.batch_size, num_workers=2, train=False, shuffle=False, backend='dali-gpu', gpu_ids=[0, 1], kv_store='nccl', dtype=opt.dtype, input_layout='NCHW') def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label else: train_data, val_data, batch_fn = get_data_rec( opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx, batch_size, num_workers, opt.random_seed) else: train_data, val_data, batch_fn = get_data_loader( opt.data_dir, batch_size, num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(net, batch_fn, ctx, val_data): if opt.use_rec: val_data.reset() acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params is '': net.initialize(mx.init.MSRAPrelu(), ctx=ctx, force_reinit=True) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 0 iteration = 0 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() if opt.use_rec: train_data.reset() train_metric.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ for X in data] with ag.record(): outputs = [ net(X.astype(opt.dtype, copy=False)) for X in data ] if distillation: loss = [ L(yhat.astype('float32', copy=False), y.astype('float32', copy=False), p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] for l in loss: l.backward() sw.add_scalar(tag='train_loss', value=sum([l.sum().asscalar() for l in loss]) / len(loss), global_step=iteration) trainer.step(batch_size) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) train_metric_name, train_metric_score = train_metric.get() sw.add_scalar( tag='train_{}_curves'.format(train_metric_name), value=('train_{}_value'.format(train_metric_name), train_metric_score), global_step=iteration) if opt.log_interval and not (i + 1) % opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (epoch, i, batch_size * opt.log_interval / (time.time() - btic), train_metric_name, train_metric_score, trainer.learning_rate)) btic = time.time() iteration += 1 if epoch == 0: sw.add_graph(net) train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) top1_val_acc, top5_val_acc = test(net, batch_fn, ctx, val_data) sw.add_scalar(tag='val_acc_curves', value=('valid_acc_value', top1_val_acc), global_step=epoch) logger.info('Epoch [%d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('Epoch [%d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info('Epoch [%d] validation: top1_acc=%f top5_acc=%f' % (epoch, top1_val_acc, top5_val_acc)) if top1_val_acc > best_val_score: best_val_score = top1_val_acc net.collect_params().save( '%s/%.4f-ghostnet_imagenet-%d-best.params' % (save_dir, best_val_score, epoch)) trainer.save_states( '%s/%.4f-ghostnet_imagenet-%d-best.states' % (save_dir, best_val_score, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.collect_params().save('%s/ghostnet_imagenet-%d.params' % (save_dir, epoch)) trainer.save_states('%s/ghostnet_imagenet-%d.states' % (save_dir, epoch)) sw.close() if save_frequency and save_dir: net.collect_params().save('%s/ghostnet_imagenet-%d.params' % (save_dir, opt.num_epochs - 1)) trainer.save_states('%s/ghostnet_imagenet-%d.states' % (save_dir, opt.num_epochs - 1)) net.hybridize(static_alloc=True, static_shape=True) if distillation: teacher.hybridize(static_alloc=True, static_shape=True) train(context)
# split ground truths gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5)) gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4)) # update metric eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids) return eval_metric.get() if __name__ == '__main__': args = parse_args() # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = args.net_params if net_name is not "": net = get_model("yolo3_darknet53_custom", classes = read_classes(args), pretrained_base=True) net.load_parameters(net_name) val_dataset, val_metric = get_dataset(args, read_classes(args)) else: net = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True) val_dataset, val_metric = get_dataset(args,net.classes) net.collect_params().reset_ctx(ctx) val_loader = get_dataloader(net, val_dataset, args.data_shape, args.batch_size, args.num_workers, args) map_name, mean_ap = validate(net, val_loader, ctx, val_metric) val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)]) print('Validation: \n{}'.format(val_msg))
# network, Region Proposal Network(including its own anchor system, proposal generator), # region-aware pooling layers, class predictors and bounding box offset predictors. # # We highly recommend you to read the original paper to learn more about the ideas # behind Faster-RCNN [Ren15]_. # # `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in Faster-RCNN networks, more on the way. # You can load your favorate one with one simple line of code: # # .. hint:: # # To avoid downloading mdoel in this tutorial, we set `pretrained_base=False`, # in practice we usually want to load pre-trained imagenet models by setting # `pretrained_base=True`. from gluoncv import model_zoo net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained_base=False) print(net) ############################################################################## # Faster-RCNN network is callable with image tensor import mxnet as mx x = mx.nd.zeros(shape=(1, 3, 600, 800)) net.initialize() cids, scores, bboxes = net(x) ############################################################################## # Faster-RCNN returns three values, where ``cids`` are the class labels, # ``scores`` are confidence scores of each prediction, # and ``bboxes`` are absolute coordinates of corresponding bounding boxes. ##############################################################################
val_dataset.transform(transform_val), batch_size=batch_size, shuffle=False, last_batch='keep', num_workers=num_workers) return val_dataset, val_data, val_batch_fn input_size = [int(i) for i in opt.input_size.split(',')] val_dataset, val_data, val_batch_fn = get_data_loader(opt.data_dir, batch_size, num_workers, input_size) val_metric = COCOKeyPointsMetric(val_dataset, 'coco_keypoints', data_shape=tuple(input_size), in_vis_thresh=opt.score_threshold) use_pretrained = True if not opt.params_file else False model_name = opt.model net = get_model(model_name, ctx=context, num_joints=num_joints, pretrained=use_pretrained) if not use_pretrained: net.load_parameters(opt.params_file, ctx=context) net.hybridize() def validate(val_data, val_dataset, net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] val_metric.reset() from tqdm import tqdm for batch in tqdm(val_data): data, scale, center, score, imgid = val_batch_fn(batch, ctx) outputs = [net(X) for X in data]
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler( os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) gc.set_threshold(100, 5, 5) # set env if opt.gpu_id == -1: context = mx.cpu() else: gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info( 'Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get classes list, if we are using a pretrained network from the model_zoo classes = None if opt.use_pretrained: if "kinetics400" in model_name: classes = Kinetics400Attr().classes elif "ucf101" in model_name: classes = UCF101Attr().classes elif "hmdb51" in model_name: classes = HMDB51Attr().classes elif "sthsth" in model_name: classes = SomethingSomethingV2Attr().classes # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) # build a pseudo dataset instance to use its children class methods video_utils = VideoClsCustom(root=opt.data_dir, setting=opt.data_list, num_segments=opt.num_segments, num_crop=opt.num_crop, new_length=opt.new_length, new_step=opt.new_step, new_width=opt.new_width, new_height=opt.new_height, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, lazy_init=True) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test, video_utils) video_input = video_data.as_in_context(context) pred = net(video_input.astype(opt.dtype, copy=False)) if opt.save_logits: logits_file = '%s_%s_logits.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy()) pred_label = np.argmax(pred.asnumpy()) if opt.save_preds: preds_file = '%s_%s_preds.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, preds_file), pred_label) # Try to report a text label instead of the number. if classes: pred_label = classes[pred_label] logger.info('%04d/%04d: %s is predicted to class %s' % (vid, len(data_list), video_name, pred_label)) end_time = time.time() logger.info('Total inference time is %4.2f minutes' % ((end_time - start_time) / 60))
from gluoncv.model_zoo import get_model from gluoncv.utils import makedirs, TrainingHistory ################################################################ # # There are numerous structures for convolutional neural networks. # Here we pick a simple yet well-performing structure, ``cifar_resnet20_v1``, for the # tutorial. # number of GPUs to use num_gpus = 1 ctx = [mx.gpu(i) for i in range(num_gpus)] # Get the model CIFAR_ResNet20_v1, with 10 output classes, without pre-trained weights net = get_model('cifar_resnet20_v1', classes=10) net.initialize(mx.init.Xavier(), ctx = ctx) ################################################################ # Data Augmentation and Data Loader # --------------------------------- # # Data augmentation is a common technique used for training. It is # base on the assumption that, for the same object, photos under different # composition, lighting condition, or color should all yield the same prediction. # # Here are photos of the Golden Bridge, taken by many people, # at different time from different angles. # We can easily tell that they are photos of the same thing. # # |image-golden-bridge|
########################################################## # Mask RCNN Network # ------------------- # In GluonCV, Mask RCNN network :py:class:`gluoncv.model_zoo.MaskRCNN` # is inherited from Faster RCNN network :py:class:`gluoncv.model_zoo.FasterRCNN`. # # `Gluon Model Zoo <../../model_zoo/index.html>`__ has some Mask RCNN pretrained networks. # You can load your favorite one with one simple line of code: # # .. hint:: # # To avoid downloading models in this tutorial, we set ``pretrained_base=False``, # in practice we usually want to load pre-trained imagenet models by setting # ``pretrained_base=True``. from gluoncv import model_zoo net = model_zoo.get_model('mask_rcnn_resnet50_v1b_coco', pretrained_base=False) print(net) ############################################################################## # Mask-RCNN has identical inputs but produces an additional output. # ``cids`` are the class labels, # ``scores`` are confidence scores of each prediction, # ``bboxes`` are absolute coordinates of corresponding bounding boxes. # ``masks`` are predicted segmentation masks corresponding to each bounding box import mxnet as mx x = mx.nd.zeros(shape=(1, 3, 600, 800)) net.initialize() cids, scores, bboxes, masks = net(x) ############################################################################## # During training, an additional output is returned:
logger.info('[Epoch {}] Validation: \n{}'.format(args.start_epoch, val_msg)) if __name__ == '__main__': args = parse_args() # evaluating contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = '_'.join(('yolo3', 'tiny_darknet', args.dataset)) args.save_prefix += net_name net = get_model(net_name) if not args.resume.strip(): if args.start_epoch == -1: raise ValueError( "You have to either give the path of the saved model or specify the start epoch!" ) # Predict the path of the saved weights from the `start_epoch` parameter args.resume = '{:s}_{:04d}.params'.format(args.save_prefix, args.start_epoch) print(f'Loading weights from {args.resume}') net.load_parameters(args.resume.strip()) # val data val_dataset, eval_metric = get_dataset(args.dataset, args) val_data = get_dataloader(val_dataset, args.data_shape, args.batch_size, args.num_workers, args)
from gluoncv import model_zoo model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)
batch_size=batch_size, shuffle=False, num_workers = num_workers) ################################################################################ # # Note that only ``train_data`` uses ``transform_train``, while # ``val_data`` and ``test_data`` use ``transform_test`` to produce deterministic # results for evaluation. # # Model and Trainer # ----------------- # # We use a pre-trained ``ResNet50_v2`` model, which has balanced accuracy and # computation cost. model_name = 'ResNet50_v2' finetune_net = get_model(model_name, pretrained=True) with finetune_net.name_scope(): finetune_net.output = nn.Dense(classes) finetune_net.output.initialize(init.Xavier(), ctx = ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': momentum, 'wd': wd}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() ################################################################################ # Here's an illustration of the pre-trained model # and our newly defined model: #
save_params(net, best_map, current_map, epoch, args.save_interval, args.save_prefix) if __name__ == '__main__': args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = '_'.join(('ssd', str(args.data_shape), args.network, args.dataset)) args.save_prefix += net_name if args.syncbn and len(ctx) > 1: net = get_model(net_name, pretrained_base=True, norm_layer=gluon.contrib.nn.SyncBatchNorm, norm_kwargs={'num_devices': len(ctx)}) async_net = get_model(net_name, pretrained_base=False) # used by cpu worker else: net = get_model(net_name, pretrained_base=True, norm_layer=gluon.nn.BatchNorm) async_net = net if args.resume.strip(): net.load_parameters(args.resume.strip()) async_net.load_parameters(args.resume.strip()) else: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") net.initialize() async_net.initialize() # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
'''Loading the model and webcam ---------------------------- In this tutorial we feed frames from the webcam into a detector, then we estimate the pose for each detected people in the frame. For the detector we use ``ssd_512_mobilenet1.0_coco`` as it is fast and accurate enough. .. code-block:: python''' ctx = mx.cpu() detector_name = "ssd_512_mobilenet1.0_coco" detector = get_model(detector_name, pretrained=True, ctx=ctx) '''The pre-trained model tries to detect all 80 classes of objects in an image, however in pose estimation we are only interested in one object class: person. To speed up the detector, we can reset the prediction head to only include the classes we need. .. code-block:: python''' detector.reset_class(classes=['person'], reuse_weights={'person':'person'}) detector.hybridize() '''Next for the estimator, we choose ``simple_pose_resnet18_v1b`` for it is light-weighted. The default ``simple_pose_resnet18_v1b`` model was trained with input size 256x192.
outputs = tracker.track(frame, ctx=mx.cpu()) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( frame, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.imwrite(os.path.join(opt.save_dir, '%04d.jpg' % (ind + 1)), frame) if __name__ == '__main__': opt = parse_args() # ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an SiamRPN model trained. We pick the one using Alexnet as the base model. # By specifying ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. net = model_zoo.get_model(opt.netwrok, ctx=mx.cpu(), pretrained=True) tracker = build_tracker(net) # Pre-process data video_frames = read_data(opt) ###################################################################### plt.imshow(video_frames[0]) plt.show() # Predict with a SiamRPN and make inference inference(video_frames, tracker, opt)
from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord ###################################################################### # Load a pretrained model # ------------------------- # # Let's get a Simple Pose model trained with input images of size 256x192 on MS COCO # dataset. We pick the one using ResNet-18 V1b as the base model. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. # # Note that a Simple Pose model takes a top-down strategy to estimate # human pose in detected bounding boxes from an object detection model. detector = model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True) pose_net = model_zoo.get_model('simple_pose_resnet18_v1b', pretrained=True) # Note that we can reset the classes of the detector to only include # human, so that the NMS process is faster. detector.reset_class(["person"], reuse_weights=['person']) ###################################################################### # Pre-process an image for detector, and make inference # -------------------- # # Next we download an image, and pre-process with preset data transforms. Here we # specify that we resize the short edge of the image to 512 px. But you can # feed an arbitrarily sized image. #
image_shape = (3, 224, 224) data_shape = (batch_size, ) + image_shape train_data = SyntheticDataIter(num_classes, data_shape, epoch_size, np.float32, context) val_data = None # Get model from GluonCV model zoo # https://gluon-cv.mxnet.io/model_zoo/index.html kwargs = { 'ctx': context, 'pretrained': args.use_pretrained, 'classes': num_classes } if args.last_gamma: kwargs['last_gamma'] = True net = get_model(args.model, **kwargs) net.cast(args.dtype) # Create initializer initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) def train_gluon(): def evaluate(epoch): if not args.use_rec: return val_data.reset() acc_top1 = mx.metric.Accuracy()
def main(logger): opt = parse_args() logger.info(opt) gc.set_threshold(100, 5, 5) if not os.path.exists(opt.save_dir): os.makedirs(opt.save_dir) # set env gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, feat_ext=True, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info( 'Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test) video_input = video_data.as_in_context(context) video_feat = net(video_input.astype(opt.dtype, copy=False)) feat_file = '%s_%s_feat.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy()) if vid > 0 and vid % opt.log_interval == 0: logger.info('%04d/%04d is done' % (vid, len(data_list))) end_time = time.time() logger.info('Total feature extraction time is %4.2f minutes' % ((end_time - start_time) / 60))
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) test_data = gluon.data.DataLoader(testset, args.test_batch_size, last_batch='keep', batchify_fn=ms_batchify_fn, num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, ctx=args.ctx, backbone=args.backbone, norm_layer=args.norm_layer) # load pretrained weight assert args.resume is not None, '=> Please provide the checkpoint using --resume' if os.path.isfile(args.resume): model.load_params(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) print(model) evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx) tbar = tqdm(test_data) for i, (data, dsts) in enumerate(tbar): if args.eval: targets = dsts predicts = evaluator.parallel_forward(data) for predict, target in zip(predicts, targets): target = target.as_in_context(predict[0].context) correct, labeled = batch_pix_accuracy(predict[0], target) inter, union = batch_intersection_union( predict[0], target, testset.num_class) total_correct += correct.astype('int64') total_label += labeled.astype('int64') total_inter += inter.astype('int64') total_union += union.astype('int64') pixAcc = np.float64(1.0) * total_correct / ( np.spacing(1, dtype=np.float64) + total_label) IoU = np.float64(1.0) * total_inter / ( np.spacing(1, dtype=np.float64) + total_union) mIoU = IoU.mean() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: im_paths = dsts predicts = evaluator.parallel_forward(data) for predict, impath in zip(predicts, im_paths): predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 1)).asnumpy() mask = get_color_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname))
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] args.batch_size = len(ctx) # 1 batch per device # network kwargs = {} module_list = [] if args.use_fpn: module_list.append('fpn') if args.norm_layer is not None: module_list.append(args.norm_layer) if args.norm_layer == 'bn': kwargs['num_devices'] = len(args.gpus.split(',')) net_name = '_'.join(('mask_rcnn', *module_list, args.network, args.dataset)) args.save_prefix += net_name net = get_model(net_name, pretrained_base=True, **kwargs) if args.resume.strip(): net.load_parameters(args.resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) train_data, val_data = get_dataloader( net, train_dataset, val_dataset, MaskRCNNDefaultTrainTransform, MaskRCNNDefaultValTransform, args.batch_size, args.num_workers, args.use_fpn)
# (which means it can be exported # to symbol to run in C++, Scala and other language bindings. # We will cover this usage in future tutorials). # In terms of structure, SSD networks are composed of base feature extraction # network, anchor generators, class predictors and bounding box offset predictors. # # For more details on how SSD detector works, please refer to our introductory # [tutorial](http://gluon.mxnet.io/chapter08_computer-vision/object-detection.html) # You can also refer to the original paper to learn more about the intuitions # behind SSD. # # `Gluon Model Zoo <../../model_zoo/index.html>`__ has a lot of built-in SSD networks. # You can load your favorate one with one simple line of code: from gluoncv import model_zoo net = model_zoo.get_model('ssd_300_vgg16_atrous_voc', pretrained_base=False) print(net) ############################################################################## # SSD network is a HybridBlock as mentioned before. You can call it with an input as: import mxnet as mx x = mx.nd.zeros(shape=(1, 3, 512, 512)) net.initialize() cids, scores, bboxes = net(x) ############################################################################## # SSD returns three values, where ``cids`` are the class labels, # ``scores`` are confidence scores of each prediction, # and ``bboxes`` are absolute coordinates of corresponding bounding boxes.
from matplotlib import pyplot as plt import gluoncv from gluoncv import model_zoo, data, utils ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an Faster RCNN model trained on Pascal VOC # dataset with ResNet-50 backbone. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. net = model_zoo.get_model('faster_rcnn_resnet50_v2a_voc', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # # Next we download an image, and pre-process with preset data transforms. Here we # specify that we resize the short edge of the image to 512 px. But you can # feed an arbitrarily sized image. # # You can provide a list of image file names, such as ``[im_fname1, im_fname2, # ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you # want to load multiple image together. # # This function returns two results. The first is a NDArray with shape # `(batch_size, RGB_channels, height, width)`. It can be fed into the
First let's import some necessary libraries: """ from gluoncv import model_zoo, data, utils from matplotlib import pyplot as plt ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an SSD model trained with 512x512 images on Pascal VOC # dataset with ResNet-50 V1 as the base model. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. net = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # # Next we download an image, and pre-process with preset data transforms. Here we # specify that we resize the short edge of the image to 512 px. But you can # feed an arbitrarily sized image. # # You can provide a list of image file names, such as ``[im_fname1, im_fname2, # ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you # want to load multiple image together. # # This function returns two results. The first is a NDArray with shape # `(batch_size, RGB_channels, height, width)`. It can be fed into the # model directly. The second one contains the images in numpy format to
def main(): opt = parse_args() # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=True, tsn=opt.use_tsn) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded' % (opt.resume_params)) # get data normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform_test = transforms.Compose( [video.VideoTenCrop(opt.input_size), video.VideoToTensor(), normalize]) val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('Load %d test samples.' % len(val_dataset)) # start evaluation acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation. This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data bacthes. `num_split_frames` has to be multiples of 3. """ num_data_batches = 10 num_split_frames = int(750 / num_data_batches) def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): outputs = [] for seg_id in range(num_data_batches): bs = seg_id * num_split_frames be = (seg_id + 1) * num_split_frames new_batch = [batch[0][:, bs:be, :, :], batch[1]] data, label = batch_fn(new_batch, ctx) for gpu_id, X in enumerate(data): X_reshaped = X.reshape( (-1, 3, opt.input_size, opt.input_size)) pred = net(X_reshaped.astype(opt.dtype, copy=False)) if seg_id == 0: outputs.append(pred) else: outputs[gpu_id] = nd.concat(outputs[gpu_id], pred, dim=0) # Perform the mean operation on 250 samples of each video for gpu_id, out in enumerate(outputs): outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0) acc_top1.update(label, outputs) acc_top5.update(label, outputs) if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done' % (i, len(val_data))) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
import gluoncv from matplotlib import pyplot as plt from gluoncv import model_zoo, data, utils from PIL import Image import numpy as np import mxnet as mx import os import pickle import time net = model_zoo.get_model('faster_rcnn_fpn_resnet50_v1b_coco', pretrained=True) class_list = net.classes dir_list = ["love"] for i in range(9): start = time.time() step = 0 os.chdir('/hdd/user16') file_name = dir_list[i] + '.txt' f = open(file_name, 'w') dir_path = '/hdd/user16/' + dir_list[i] os.chdir(dir_path) file_names = os.listdir() print(file_names) for file_name_ in file_names: f.write(file_name_+' - ') step += 1 img_path = '/hdd/user16/' + dir_list[i] + '/' + str(file_name_)
if __name__ == '__main__': args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] args.batch_size = len(ctx) # 1 batch per device # network net_name = '_'.join(('fpn', args.network, args.dataset)) args.save_prefix += net_name net = get_model(net_name, pretrained_base=True) if args.resume.strip(): net.load_parameters(args.resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) train_data, val_data = get_dataloader(net, train_dataset, val_dataset, args.batch_size, args.num_workers) # training
def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) valset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, crop_size=args.crop_size) model.cast(args.dtype) print(model) self.net = DataParallelModel(model, args.ctx, args.syncbn) self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # create criterion criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight) self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn) # optimizer and lr scheduling self.lr_scheduler = LRScheduler(mode='poly', baselr=args.lr, niters=len(self.train_data), nepochs=args.epochs) kv = mx.kv.create(args.kvstore) optimizer_params = { 'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'momentum': args.momentum } if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.module.collect_params( '.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd', optimizer_params, kvstore=kv) # evaluation metrics self.metric = gluoncv.utils.metrics.SegmentationMetric( trainset.num_class)
def test(args): if not horse_change: # output folder # outdir = 'outdir' outdir = args.outdir if not os.path.exists(outdir): os.makedirs(outdir) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize([.485, .456, .406], [.229, .224, .225]), # transforms.Normalize([0, 0, 0], [1, 1, 1]), # transforms.Normalize([0], [100]), # this is for 1 channel: ([0], [1]) ([556.703], [482.175]) ]) # dataset and dataloader if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) test_data = gluon.data.DataLoader(testset, args.test_batch_size, shuffle=False, last_batch='keep', batchify_fn=ms_batchify_fn, num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, ctx=args.ctx, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, base_size=args.base_size, crop_size=args.crop_size) # load pretrained weight assert args.resume is not None, '=> Please provide the checkpoint using --resume' if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # print(model) # [horse]: do not print model evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx) metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class) print('testset.pred_offset:', testset.pred_offset) # horse print('model.crop_size', model.crop_size) # horse tbar = tqdm(test_data) for i, (data, dsts) in enumerate(tbar): if args.eval: # print('data', data[0].shape) # horse predicts = [pred[0] for pred in evaluator.parallel_forward(data)] # print('predicts', predicts[0].shape) targets = [target.as_in_context(predicts[0].context) \ for target in dsts] # horse begin ''' predict = mx.nd.squeeze(mx.nd.argmax(predicts[0], 0)).asnumpy() + \ testset.pred_offset ''' # horse end print('targets', targets[0].shape) metric.update(targets, predicts) pixAcc, mIoU = metric.get() tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: output_score_map = True # [horse added] if output_score_map: # score_map_dir = 'scoredir' score_map_dir = args.scoredir if not os.path.exists(score_map_dir): os.makedirs(score_map_dir) im_paths = dsts # print('data', data[0].shape) # horse predicts = evaluator.parallel_forward(data) # print(predicts[0].shape) for predict, impath in zip(predicts, im_paths): # change from 1 to 0 [horse] # print('predict:', predict[0].shape) # predict: (3, 127, 207) if output_score_map: score_map_name = os.path.splitext(impath)[0] + '.pkl' score_map_path = os.path.join(score_map_dir, score_map_name) with open(score_map_path, 'wb') as fo: pickle.dump(predict[0].asnumpy()[0:3,:,:], fo) ''' if i == 50: with open('have_a_look.pkl', 'wb') as fo: pickle.dump(predict[0].asnumpy(),fo) ''' predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 0)).asnumpy() + \ testset.pred_offset mask = get_color_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' # print('predict:', predict.shape) # predict: (127, 207) # print('mask:', mask) # it is a PIL.Image.Image mask.save(os.path.join(outdir, outname)) # break if horse_change: # >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize([.485, .456, .406], [.229, .224, .225]), # transforms.Normalize([0, 0, 0], [1, 1, 1]), # transforms.Normalize([0], [100]), # this is for 1 channel: ([0], [1]) ([556.703], [482.175]) ]) # dataset and dataloader if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) test_data = gluon.data.DataLoader(testset, args.batch_size, # args.test_batch_size, [horse changed this] shuffle=False, last_batch='keep', batchify_fn=ms_batchify_fn, num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, ctx=args.ctx, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, base_size=args.base_size, crop_size=args.crop_size) # load pretrained weight assert args.resume is not None, '=> Please provide the checkpoint using --resume' if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # print(model) # [horse]: do not print model evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx) metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class) print('testset.pred_offset:', testset.pred_offset) # horse print('model.crop_size', model.crop_size) # horse tbar = tqdm(test_data) for i, (data, dsts) in enumerate(tbar): if args.eval: # print('data', data[0].shape) # horse predicts = [pred[0] for pred in evaluator.parallel_forward(data)] # print('predicts', predicts[0].shape) targets = [target.as_in_context(predicts[0].context) \ for target in dsts] # horse begin ''' predict = mx.nd.squeeze(mx.nd.argmax(predicts[0], 0)).asnumpy() + \ testset.pred_offset ''' # horse end print('targets', targets[0].shape) metric.update(targets, predicts) pixAcc, mIoU = metric.get() tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: output_score_map = True # [horse added] if output_score_map: score_map_dir = 'scoredir' im_paths = dsts print('data', data[0].shape) # horse predicts = evaluator.parallel_forward(data) print(predicts[0].shape) for predict, impath in zip(predicts, im_paths): predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 0)).asnumpy() + \ testset.pred_offset mask = get_color_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname))
num_gpus = opt.num_gpus if num_gpus > 0: batch_size *= num_gpus ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers input_size = opt.input_size model_name = opt.model pretrained = True if not opt.params_file else False kwargs = {'ctx': ctx, 'pretrained': pretrained, 'classes': classes} if model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se net = get_model(model_name, **kwargs) net.cast(opt.dtype) if opt.params_file: net.load_params(opt.params_file, ctx=ctx) net.hybridize() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) """ Aligning with TF implemenation, the default crop-input ratio set as 0.875; Set the crop as ceil(input-size/ratio) """ crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio))
num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_parameters(opt.resume_from, ctx = context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 plot_path = opt.save_plot_dir logging.basicConfig(level=logging.INFO)
model_name = supported_model[0] dshape = (1, 3, 512, 512) target_list = ctx_list() ###################################################################### # Download and pre-process demo image im_fname = download_testdata('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/street_small.jpg?raw=true', 'street_small.jpg', module='data') x, img = data.transforms.presets.ssd.load_test(im_fname, short=512) ###################################################################### # Convert and compile model for CPU. block = model_zoo.get_model(model_name, pretrained=True) def build(target): mod, params = relay.frontend.from_mxnet(block, {"data": dshape}) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target, params=params) return lib ###################################################################### # Create TVM runtime and do inference def run(lib, ctx): # Build TVM runtime m = graph_runtime.GraphModule(lib['default'](ctx)) tvm_input = tvm.nd.array(x.asnumpy(), ctx=ctx) m.set_input('data', tvm_input)
""" from gluoncv import model_zoo, data, utils from matplotlib import pyplot as plt ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an SSD model trained with 512x512 images on Pascal VOC # dataset with ResNet-50 V1 as the base model. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. net = model_zoo.get_model('ssd_512_resnet50_v1_voc', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # # Next we download an image, and pre-process with preset data transforms. Here we # specify that we resize the short edge of the image to 512 px. But you can # feed an arbitrarily sized image. # # You can provide a list of image file names, such as ``[im_fname1, im_fname2, # ...]`` to :py:func:`gluoncv.data.transforms.presets.ssd.load_test` if you # want to load multiple image together. # # This function returns two results. The first is a NDArray with shape # `(batch_size, RGB_channels, height, width)`. It can be fed into the
# In terms of structure, YOLOv3 networks are composed of base feature extraction # network, convolutional transition layers, upsampling layers, and specially designed YOLOv3 output layers. # # We highly recommend you to read the original paper to learn more about the ideas # behind YOLO [YOLOv3]_. # # `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in YOLO networks, more on the way. # You can load your favorate one with one simple line of code: # # .. hint:: # # To avoid downloading mdoel in this tutorial, we set `pretrained_base=False`, # in practice we usually want to load pre-trained imagenet models by setting # `pretrained_base=True`. from gluoncv import model_zoo net = model_zoo.get_model('yolo3_darknet53_voc', pretrained_base=False) print(net) ############################################################################## # YOLOv3 network is callable with image tensor import mxnet as mx x = mx.nd.zeros(shape=(1, 3, 416, 416)) net.initialize() cids, scores, bboxes = net(x) ############################################################################## # YOLOv3 returns three values, where ``cids`` are the class labels, # ``scores`` are confidence scores of each prediction, # and ``bboxes`` are absolute coordinates of corresponding bounding boxes.
args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] args.batch_size = len(ctx) # 1 batch per device # network module_list = [] if args.use_fpn: module_list.append('fpn') net_name = '_'.join(('mask_rcnn', *module_list, args.network, args.dataset)) args.save_prefix += net_name net = get_model(net_name, pretrained_base=True) if args.resume.strip(): net.load_parameters(args.resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) train_data, val_data = get_dataloader( net, train_dataset, val_dataset, MaskRCNNDefaultTrainTransform, MaskRCNNDefaultValTransform, args.batch_size, args.num_workers, args.use_fpn)
import matplotlib.pyplot as plt from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas # noinspection PyUnresolvedReferences from mpl_toolkits.mplot3d import Axes3D import numpy as np import torch from common.camera import camera_to_world from common.generators import UnchunkedGenerator from common.model import TemporalModel device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 1. 加载目标检测器和2d关键点检测器 detector_name = ['yolo3_mobilenet1.0_coco', 'yolo3_darknet53_coco'] posenet_name = ['simple_pose_resnet18_v1b', 'simple_pose_resnet101_v1b'] detector = model_zoo.get_model(detector_name[1], pretrained=True) pose_net = model_zoo.get_model(posenet_name[1], pretrained=True) # noinspection PyUnresolvedReferences detector.reset_class(['person'], reuse_weights=['person']) def detect_2d_joints(frame, short=360): """ Args: short: 较短边resize大小 frame: 任意尺寸的RGB图像 Returns: 处理过的图像(ndarray),关节点坐标(NDArray)以及置信度等显示2d姿势相关的要素 """ # 缩放图像和生成目标检测器输入张量
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) model_name += '_mixup' if opt.resume_from: net.load_parameters(opt.resume_from, ctx = context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 plot_name = opt.save_plot_dir logging_handlers = [logging.StreamHandler()] if opt.logging_dir: logging_dir = opt.logging_dir makedirs(logging_dir) logging_handlers.append(logging.FileHandler('%s/train_cifar10_%s.log'%(logging_dir, model_name))) logging.basicConfig(level=logging.INFO, handlers = logging_handlers) logging.info(opt) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx = label.context) res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1 return res def test(ctx, val_data): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.Xavier(), ctx=ctx) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum}) metric = mx.metric.Accuracy() train_metric = mx.metric.RMSE() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False) train_history = TrainingHistory(['training-error', 'validation-error']) iteration = 0 lr_decay_count = 0 best_val_score = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate*lr_decay) lr_decay_count += 1 for i, batch in enumerate(train_data): lam = np.random.beta(alpha, alpha) if epoch >= epochs - 20: lam = 1 data_1 = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label_1 = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) data = [lam*X + (1-lam)*X[::-1] for X in data_1] label = [] for Y in label_1: y1 = label_transform(Y, classes) y2 = label_transform(Y[::-1], classes) label.append(lam*y1 + (1-lam)*y2) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) output_softmax = [nd.SoftmaxActivation(out) for out in output] train_metric.update(label, output_softmax) name, acc = train_metric.get() iteration += 1 train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) train_history.update([acc, 1-val_acc]) train_history.plot(save_path='%s/%s_history.png'%(plot_name, model_name)) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('%s/%.4f-cifar-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch)) name, val_acc = test(ctx, val_data) logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' % (epoch, acc, val_acc, train_loss, time.time()-tic)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epochs-1)) if opt.mode == 'hybrid': net.hybridize() train(opt.num_epochs, context)
def __init__(self, options, logger): # configuration setting self.opt = options self.logger = logger self.log_path = os.path.join(self.opt.log_dir, self.opt.model_zoo) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" ################### model initialization ################### self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") # create network if self.opt.model_zoo is not None: self.model = get_model(self.opt.model_zoo, pretrained_base=self.opt.pretrained_base, scales=self.opt.scales, ctx=self.opt.ctx) else: assert "Must choose a model from model_zoo, " \ "please provide the model_zoo using --model_zoo" self.logger.info(self.model) # resume checkpoint if needed if self.opt.resume is not None: if os.path.isfile(self.opt.resume): logger.info('Resume model: %s' % self.opt.resume) self.model.load_parameters(self.opt.resume, ctx=self.opt.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'".format(self.opt.resume)) self.parameters_to_train = self.model.collect_params() if self.opt.hybridize: self.model.hybridize() ######################### dataloader ######################### datasets_dict = {"kitti": KITTIRAWDataset, "kitti_odom": KITTIOdomDataset} self.dataset = datasets_dict[self.opt.dataset] fpath = os.path.join(os.path.expanduser("~"), ".mxnet/datasets/kitti", "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' if self.opt.png else '.jpg' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, num_scales=4, is_train=True, img_ext=img_ext) self.train_loader = gluon.data.DataLoader( train_dataset, batch_size=self.opt.batch_size, shuffle=True, batchify_fn=dict_batchify_fn, num_workers=self.opt.num_workers, pin_memory=True, last_batch='discard') val_dataset = self.dataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, num_scales=4, is_train=False, img_ext=img_ext) self.val_loader = gluon.data.DataLoader( val_dataset, batch_size=self.opt.batch_size, shuffle=False, batchify_fn=dict_batchify_fn, num_workers=self.opt.num_workers, pin_memory=True, last_batch='discard') ################### optimization setting ################### self.lr_scheduler = LRSequential([ LRScheduler('step', base_lr=self.opt.learning_rate, nepochs=self.opt.num_epochs - self.opt.warmup_epochs, iters_per_epoch=len(train_dataset), step_epoch=[self.opt.scheduler_step_size - self.opt.warmup_epochs]) ]) optimizer_params = {'lr_scheduler': self.lr_scheduler, 'learning_rate': self.opt.learning_rate} self.optimizer = gluon.Trainer(self.parameters_to_train, 'adam', optimizer_params) print("Training model named:\n ", self.opt.model_zoo) print("Models are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", "CPU" if self.opt.ctx[0] is mx.cpu() else "GPU") ################### loss function ################### if not self.opt.no_ssim: self.ssim = SSIM() self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2 ** scale) w = self.opt.width // (2 ** scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w, ctx=self.opt.ctx[0]) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) ################### metrics ################### self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"] print("Using split:\n ", self.opt.split) print("There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts() # for save best model self.best_delta1 = 0 self.best_model = self.model
# ------------------- # GluonCV's Faster-RCNN implementation is a composite Gluon HybridBlock :py:class:`gluoncv.model_zoo.FasterRCNN`. # In terms of structure, Faster-RCNN networks are composed of base feature extraction # network, Region Proposal Network(including its own anchor system, proposal generator), # region-aware pooling layers, class predictors and bounding box offset predictors. # # `Gluon Model Zoo <../../model_zoo/index.html>`__ has a few built-in Faster-RCNN networks, more on the way. # You can load your favorate one with one simple line of code: # # .. hint:: # # To avoid downloading mdoel in this tutorial, we set ``pretrained_base=False``, # in practice we usually want to load pre-trained imagenet models by setting # ``pretrained_base=True``. from gluoncv import model_zoo net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained_base=False) print(net) ############################################################################## # Faster-RCNN network is callable with image tensor import mxnet as mx x = mx.nd.zeros(shape=(1, 3, 600, 800)) net.initialize() cids, scores, bboxes = net(x) ############################################################################## # Faster-RCNN returns three values, where ``cids`` are the class labels, # ``scores`` are confidence scores of each prediction, # and ``bboxes`` are absolute coordinates of corresponding bounding boxes. ##############################################################################
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler( os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5, verbose=False) if opt.kvstore is not None: kv = mx.kvstore.create(opt.kvstore) logger.info( 'Distributed training with %d workers and current rank is %d' % (kv.num_workers, kv.rank)) if opt.use_amp: amp.init() batch_size = opt.batch_size classes = opt.num_classes num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) logger.info('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] if opt.slowfast: optimizer = 'nag' else: optimizer = 'sgd' if opt.clip_grad > 0: optimizer_params = { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'clip_gradient': opt.clip_grad } else: optimizer_params = { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True model_name = opt.model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, use_tsn=opt.use_tsn, num_segments=opt.num_segments, partial_bn=opt.partial_bn) net.cast(opt.dtype) net.collect_params().reset_ctx(context) logger.info(net) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) print('Continue training from model %s.' % (opt.resume_params)) if opt.kvstore is not None: train_data, val_data, batch_fn = get_data_loader( opt, batch_size, num_workers, logger, kv) else: train_data, val_data, batch_fn = get_data_loader( opt, batch_size, num_workers, logger) num_batches = len(train_data) lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=opt.warmup_lr, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) optimizer_params['lr_scheduler'] = lr_scheduler train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) def test(ctx, val_data, kvstore=None): acc_top1.reset() acc_top5.reset() L = gluon.loss.SoftmaxCrossEntropyLoss() num_test_iter = len(val_data) val_loss_epoch = 0 for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [] for _, X in enumerate(data): X = X.reshape((-1, ) + X.shape[2:]) pred = net(X.astype(opt.dtype, copy=False)) outputs.append(pred) loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] acc_top1.update(label, outputs) acc_top5.update(label, outputs) val_loss_epoch += sum([l.mean().asscalar() for l in loss]) / len(loss) if opt.log_interval and not (i + 1) % opt.log_interval: _, top1 = acc_top1.get() _, top5 = acc_top5.get() logger.info('Batch [%04d]/[%04d]: acc-top1=%f acc-top5=%f' % (i, num_test_iter, top1 * 100, top5 * 100)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() val_loss = val_loss_epoch / num_test_iter if kvstore is not None: top1_nd = nd.zeros(1) top5_nd = nd.zeros(1) val_loss_nd = nd.zeros(1) kvstore.push(111111, nd.array(np.array([top1]))) kvstore.pull(111111, out=top1_nd) kvstore.push(555555, nd.array(np.array([top5]))) kvstore.pull(555555, out=top5_nd) kvstore.push(999999, nd.array(np.array([val_loss]))) kvstore.pull(999999, out=val_loss_nd) top1 = top1_nd.asnumpy() / kvstore.num_workers top5 = top5_nd.asnumpy() / kvstore.num_workers val_loss = val_loss_nd.asnumpy() / kvstore.num_workers return (top1, top5, val_loss) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if opt.partial_bn: train_patterns = None if 'inceptionv3' in opt.model: train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var' else: logger.info( 'Current model does not support partial batch normalization.' ) if opt.kvstore is not None: trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params, kvstore=kv, update_on_kvstore=False) else: trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params, update_on_kvstore=False) else: if opt.kvstore is not None: trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, kvstore=kv, update_on_kvstore=False) else: trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, update_on_kvstore=False) if opt.accumulate > 1: params = [ p for p in net.collect_params().values() if p.grad_req != 'null' ] for p in params: p.grad_req = 'add' if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.use_amp: amp.init_trainer(trainer) L = gluon.loss.SoftmaxCrossEntropyLoss() best_val_score = 0 lr_decay_count = 0 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() btic = time.time() num_train_iter = len(train_data) train_loss_epoch = 0 train_loss_iter = 0 for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) with ag.record(): outputs = [] for _, X in enumerate(data): X = X.reshape((-1, ) + X.shape[2:]) pred = net(X.astype(opt.dtype, copy=False)) outputs.append(pred) loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] if opt.use_amp: with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) else: ag.backward(loss) if opt.accumulate > 1 and (i + 1) % opt.accumulate == 0: if opt.kvstore is not None: trainer.step(batch_size * kv.num_workers * opt.accumulate) else: trainer.step(batch_size * opt.accumulate) net.collect_params().zero_grad() else: if opt.kvstore is not None: trainer.step(batch_size * kv.num_workers) else: trainer.step(batch_size) train_metric.update(label, outputs) train_loss_iter = sum([l.mean().asscalar() for l in loss]) / len(loss) train_loss_epoch += train_loss_iter train_metric_name, train_metric_score = train_metric.get() sw.add_scalar(tag='train_acc_top1_iter', value=train_metric_score * 100, global_step=epoch * num_train_iter + i) sw.add_scalar(tag='train_loss_iter', value=train_loss_iter, global_step=epoch * num_train_iter + i) sw.add_scalar(tag='learning_rate_iter', value=trainer.learning_rate, global_step=epoch * num_train_iter + i) if opt.log_interval and not (i + 1) % opt.log_interval: logger.info( 'Epoch[%03d] Batch [%04d]/[%04d]\tSpeed: %f samples/sec\t %s=%f\t loss=%f\t lr=%f' % (epoch, i, num_train_iter, batch_size * opt.log_interval / (time.time() - btic), train_metric_name, train_metric_score * 100, train_loss_epoch / (i + 1), trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) mx.ndarray.waitall() if opt.kvstore is not None and epoch == opt.resume_epoch: kv.init(111111, nd.zeros(1)) kv.init(555555, nd.zeros(1)) kv.init(999999, nd.zeros(1)) if opt.kvstore is not None: acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data, kv) else: acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data) logger.info('[Epoch %03d] training: %s=%f\t loss=%f' % (epoch, train_metric_name, train_metric_score * 100, train_loss_epoch / num_train_iter)) logger.info('[Epoch %03d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info( '[Epoch %03d] validation: acc-top1=%f acc-top5=%f loss=%f' % (epoch, acc_top1_val * 100, acc_top5_val * 100, loss_val)) sw.add_scalar(tag='train_loss_epoch', value=train_loss_epoch / num_train_iter, global_step=epoch) sw.add_scalar(tag='val_loss_epoch', value=loss_val, global_step=epoch) sw.add_scalar(tag='val_acc_top1_epoch', value=acc_top1_val * 100, global_step=epoch) if acc_top1_val > best_val_score: best_val_score = acc_top1_val net.save_parameters('%s/%.4f-%s-%s-%03d-best.params' % (opt.save_dir, best_val_score, opt.dataset, model_name, epoch)) trainer.save_states('%s/%.4f-%s-%s-%03d-best.states' % (opt.save_dir, best_val_score, opt.dataset, model_name, epoch)) else: if opt.save_frequency and opt.save_dir and ( epoch + 1) % opt.save_frequency == 0: net.save_parameters( '%s/%s-%s-%03d.params' % (opt.save_dir, opt.dataset, model_name, epoch)) trainer.save_states( '%s/%s-%s-%03d.states' % (opt.save_dir, opt.dataset, model_name, epoch)) # save the last model net.save_parameters( '%s/%s-%s-%03d.params' % (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1)) trainer.save_states( '%s/%s-%s-%03d.states' % (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) train(context) sw.close()
gt_ids = mx.nd.ones(shape=(2, 4)) * -1 gt_ids[0, :1] = id1 gt_ids[1, :4] = id2 print('class_ids:', gt_ids) ############################################################################ gt_boxes = mx.nd.ones(shape=(2, 4, 4)) * -1 gt_boxes[0, :1, :] = bbox1 gt_boxes[1, :, :] = bbox2 print('bounding boxes:', gt_boxes) ############################################################################ # We use a vgg16 atrous 300x300 SSD model in this example. For demo purpose, we # don't use any pretrained weights here from gluoncv import model_zoo net = model_zoo.get_model('ssd_300_vgg16_atrous_voc', pretrained_base=False, pretrained=False) ############################################################################ # Some preparation before training from mxnet import gluon net.initialize() conf_loss = gluon.loss.SoftmaxCrossEntropyLoss() loc_loss = gluon.loss.HuberLoss() ############################################################################ # Simulate the training steps by manually compute losses: # You can always use ``gluoncv.loss.SSDMultiBoxLoss`` which fulfills this function. from mxnet import autograd from gluoncv.model_zoo.ssd.target import SSDTargetGenerator target_generator = SSDTargetGenerator() with autograd.record():
def main(): opt = parse_args() bps.init() gpu_name = subprocess.check_output( ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv']) gpu_name = gpu_name.decode('utf8').split('\n')[-2] gpu_name = '-'.join(gpu_name.split()) filename = "imagenet-%d-%s-%s.log" % (bps.size(), gpu_name, opt.logging_file) filehandler = logging.FileHandler(filename) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_gpus = opt.num_gpus # batch_size *= max(1, num_gpus) context = mx.gpu(bps.local_rank()) if num_gpus > 0 else mx.cpu( bps.local_rank()) num_workers = opt.num_workers nworker = bps.size() rank = bps.rank() lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // (batch_size * nworker) lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=opt.warmup_lr, target_lr=opt.lr * nworker / bps.local_size(), nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr * nworker / bps.local_size(), target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = { 'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes } if opt.use_gn: from gluoncv.nn import GroupNorm kwargs['norm_layer'] = GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True if opt.compressor: optimizer = 'sgd' else: optimizer = 'nag' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True net = get_model(model_name, **kwargs) net.cast(opt.dtype) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher teacher = get_model(teacher_name, pretrained=True, classes=classes, ctx=context) teacher.cast(opt.dtype) distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers): rec_train = os.path.expanduser(rec_train) rec_train_idx = os.path.expanduser(rec_train_idx) rec_val = os.path.expanduser(rec_val) rec_val_idx = os.path.expanduser(rec_val_idx) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) mean_rgb = [123.68, 116.779, 103.939] std_rgb = [58.393, 57.12, 57.375] def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return data, label train_data = mx.io.ImageRecordIter(path_imgrec=rec_train, path_imgidx=rec_train_idx, preprocess_threads=num_workers, shuffle=True, batch_size=batch_size, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], rand_mirror=True, random_resized_crop=True, max_aspect_ratio=4. / 3., min_aspect_ratio=3. / 4., max_random_area=1, min_random_area=0.08, brightness=jitter_param, saturation=jitter_param, contrast=jitter_param, pca_noise=lighting_param, num_parts=nworker, part_index=rank) val_data = mx.io.ImageRecordIter(path_imgrec=rec_val, path_imgidx=rec_val_idx, preprocess_threads=num_workers, shuffle=False, batch_size=batch_size, resize=resize, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], num_parts=nworker, part_index=rank) return train_data, val_data, batch_fn def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn if opt.use_rec: train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx, batch_size, num_workers) else: train_data, val_data, batch_fn = get_data_loader( opt.data_dir, batch_size, num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(ctx, val_data): if opt.use_rec: val_data.reset() acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (1 - top1, 1 - top5) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params is '': net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 compression_params = { "compressor": opt.compressor, "ef": opt.ef, "momentum": opt.compress_momentum, "scaling": opt.onebit_scaling, "k": opt.k } trainer = bps.DistributedTrainer(net.collect_params(), optimizer, optimizer_params, compression_params=compression_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 1 # bps.byteps_declare_tensor("acc") for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() if opt.use_rec: train_data.reset() train_metric.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: teacher_prob = [ nd.softmax( teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) for X in data ] with ag.record(): outputs = [ net(X.astype(opt.dtype, copy=False)) for X in data ] if distillation: loss = [ L(yhat.astype('float32', copy=False), y.astype('float32', copy=False), p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] for l in loss: l.backward() trainer.step(batch_size) if opt.mixup: output_softmax = [ nd.SoftmaxActivation(out.astype('float32', copy=False)) for out in outputs ] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) if opt.log_interval and not (i + 1) % opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f\ttime=%f' % (epoch, i, batch_size * nworker * opt.log_interval / (time.time() - btic), train_metric_name, train_metric_score, trainer.learning_rate, time.time() - btic)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * nworker * i / (time.time() - tic)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) # acc = mx.nd.array([train_metric_score, err_top1_val, err_top5_val], # ctx=ctx[0]) # bps.byteps_push_pull(acc, name="acc", is_average=False) # acc /= bps.size() # train_metric_score, err_top1_val, err_top5_val = acc[0].asscalar( # ), acc[1].asscalar(), acc[2].asscalar() # if bps.rank() == 0: logger.info('[Epoch %d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' % (epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters( '%s/%.4f-imagenet-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) trainer.save_states( '%s/%.4f-imagenet-%s-%d-best.states' % (save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, opt.num_epochs - 1)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if distillation: teacher.hybridize(static_alloc=True, static_shape=True) train(context)
num_workers=num_workers) ################################################################################ # # Note that only ``train_data`` uses ``transform_train``, while # ``val_data`` and ``test_data`` use ``transform_test`` to produce deterministic # results for evaluation. # # Model and Trainer # ----------------- # # We use a pre-trained ``ResNet50_v2`` model, which has balanced accuracy and # computation cost. model_name = 'ResNet50_v2' finetune_net = get_model(model_name, pretrained=True) with finetune_net.name_scope(): finetune_net.output = nn.Dense(num_classes) finetune_net.output.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': momentum, 'wd': wd }) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() ################################################################################
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] args.batch_size = len(ctx) # 1 batch per device # network net_name = '_'.join(('faster_rcnn', cfg.BACKBONE.NAME, cfg.DATASET.TYPE)) time_str = time.strftime("%m%d_%H%M") args.logdir = os.path.join(args.logdir, "{}_{}".format(net_name, time_str)) # set up logger logger.set_logger_dir(args.logdir, 'd') logger.info("Config: ------------------------------------------\n" + \ pprint.pformat(cfg.to_dict(), indent=1, width=100, compact=True)) net = get_model(net_name, pretrained_base=True, dtype='float16' if cfg.GENERAL.FP16 else 'float32') if cfg.GENERAL.FP16: net.cast('float16') if args.load.strip(): net.load_parameters(args.load.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data train_dataset, val_dataset, eval_metric = get_dataset( cfg.DATASET.TYPE, args)
from gluoncv import model_zoo, data, utils ###################################################################### # Load a pretrained model # ------------------------- # # Let's get an Faster RCNN model trained on Pascal VOC # dataset with ResNet-50 backbone. By specifying # ``pretrained=True``, it will automatically download the model from the model # zoo if necessary. For more pretrained models, please refer to # :doc:`../../model_zoo/index`. # # The returned model is a HybridBlock :py:class:`gluoncv.model_zoo.FasterRCNN` # with a default context of `cpu(0)`. net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained=True) ###################################################################### # Pre-process an image # -------------------- # # Next we download an image, and pre-process with preset data transforms. # The default behavior is to resize the short edge of the image to 600px. # But you can feed an arbitrarily sized image. # # You can provide a list of image file names, such as ``[im_fname1, im_fname2, # ...]`` to :py:func:`gluoncv.data.transforms.presets.rcnn.load_test` if you # want to load multiple image together. # # This function returns two results. The first is a NDArray with shape # `(batch_size, RGB_channels, height, width)`. It can be fed into the
def train(train_path, val_path, test_path): # Initialize the net with pretrained model finetune_net = get_model(model_name, pretrained=True) with finetune_net.name_scope(): finetune_net.output = nn.Dense(classes) finetune_net.output.initialize(init.Xavier(), ctx = ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Define DataLoader train_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(train_path).transform_first(transform_train), batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers = num_workers) test_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(test_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers = num_workers) # Define Trainer trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': momentum, 'wd': wd}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training for epoch in range(epochs): if epoch == lr_steps[lr_counter]: trainer.set_learning_rate(trainer.learning_rate*lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) metric.update(label, outputs) _, train_acc = metric.get() train_loss /= num_batch _, val_acc = test(finetune_net, val_data, ctx) logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f | time: %.1f' % (epoch, train_acc, train_loss, val_acc, time.time() - tic)) _, test_acc = test(finetune_net, test_data, ctx) logging.info('[Finished] Test-acc: %.3f' % (test_acc))