def get_data_loader(opt, batch_size, num_workers, logger): data_dir = opt.data_dir normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) scale_ratios = [1.0, 0.875, 0.75, 0.66] input_size = opt.input_size def batch_fn(batch, ctx): if opt.num_segments > 1: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments) else: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label transform_train = transforms.Compose([ video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios), video.VideoRandomHorizontalFlip(), video.VideoToTensor(), normalize ]) transform_test = transforms.Compose([ video.VideoCenterCrop(size=input_size), video.VideoToTensor(), normalize ]) train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) if opt.num_segments > 1: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) else: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn
Read with GluonCV ----------------- The prepared dataset can be loaded with utility class :py:class:`gluoncv.data.ucf101` directly. Here is an example that randomly reads 25 videos each time, randomly selects one frame per video and performs center cropping. """ from gluoncv.data import ucf101 from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms from gluoncv.data.transforms import video transform_train = transforms.Compose([ video.VideoCenterCrop(size=224), ]) # Default location of the data is stored on ~/.mxnet/datasets/ucf101 # You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder. train_dataset = ucf101.classification.UCF101(train=True, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### for x, y in train_data: print('Video frame size (batch, height, width, RGB):', x.shape) print('Video label:', y.shape) break #########################################################################
def main(): opt = parse_args() print(opt) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_test = transforms.Compose([ video.VideoCenterCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if opt.dataset == 'ucf101': val_dataset = ucf101.classification.UCF101( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = kinetics400.classification.Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers)) print('Load %d test samples.' % len(val_dataset)) # start evaluation acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation. This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data batches. `num_data_batches` has to be set to a value as long as `num_split_frames` is multiples of 3. For example, when `num_data_batches` is set to 10, `num_split_frames` will be 750/10=75, which is multiples of 3. If you have enough GPU memory and prefer faster evaluation speed, you can set `num_data_batches` to 1. """ num_data_batches = 10 if opt.ten_crop: num_frames = opt.num_segments * 10 else: num_frames = opt.num_segments num_split_frames = int(num_frames * 3 / num_data_batches) def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): outputs = [] for seg_id in range(num_data_batches): bs = seg_id * num_split_frames be = (seg_id + 1) * num_split_frames if opt.input_5d: new_batch = [batch[0][:, bs:be, :, :, :], batch[1]] else: new_batch = [batch[0][:, bs:be, :, :], batch[1]] data, label = batch_fn(new_batch, ctx) for gpu_id, X in enumerate(data): if opt.input_5d: new_X = X.reshape((-1, 3, opt.new_length, opt.input_size, opt.input_size)) else: new_X = X.reshape( (-1, 3, opt.input_size, opt.input_size)) pred = net(new_X) if seg_id == 0: outputs.append(pred) else: outputs[gpu_id] = nd.concat(outputs[gpu_id], pred, dim=0) # Perform the mean operation on 'num_frames' samples of each video for gpu_id, out in enumerate(outputs): outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0) acc_top1.update(label, outputs) acc_top5.update(label, outputs) mx.ndarray.waitall() _, cur_top1 = acc_top1.get() _, cur_top5 = acc_top5.get() if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done: acc-top1=%f acc-top5=%f' % (i, len(val_data), cur_top1 * 100, cur_top5 * 100)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))