Python VideoCenterCropの例

プログラミング言語: Python

名前空間/パッケージ名: gluoncv.data.transforms.video

メソッド/関数: VideoCenterCrop

hotexamples.comのコード掲載数: 3

Python VideoCenterCrop - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgluoncv.data.transforms.video.VideoCenterCropの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: train_recognizer.py プロジェクト: gyhandy/gluon-cv

def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.data_dir
    normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        return data, label

    transform_train = transforms.Compose([
        video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios),
        video.VideoRandomHorizontalFlip(),
        video.VideoToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        video.VideoCenterCrop(size=input_size),
        video.VideoToTensor(),
        normalize
    ])

    train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True,
                                                 new_width=opt.new_width, new_height=opt.new_height,
                                                 target_width=input_size, target_height=input_size,
                                                 num_segments=opt.num_segments, transform=transform_train)
    val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False,
                                               new_width=opt.new_width, new_height=opt.new_height,
                                               target_width=input_size, target_height=input_size,
                                               num_segments=opt.num_segments, transform=transform_test)
    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    if opt.num_segments > 1:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
    else:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_data, val_data, batch_fn

コード例 #2

ファイルを表示

Read with GluonCV
-----------------

The prepared dataset can be loaded with utility class :py:class:`gluoncv.data.ucf101`
directly. Here is an example that randomly reads 25 videos each time, randomly selects one frame per video and
performs center cropping.
"""

from gluoncv.data import ucf101
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms
from gluoncv.data.transforms import video

transform_train = transforms.Compose([
    video.VideoCenterCrop(size=224),
])

# Default location of the data is stored on ~/.mxnet/datasets/ucf101
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder.
train_dataset = ucf101.classification.UCF101(train=True,
                                             transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
for x, y in train_data:
    print('Video frame size (batch, height, width, RGB):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################

コード例 #3

ファイルを表示

def main():
    opt = parse_args()
    print(opt)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        transform_test = transforms.Compose([
            video.VideoCenterCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    if opt.dataset == 'ucf101':
        val_dataset = ucf101.classification.UCF101(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = kinetics400.classification.Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers))
    print('Load %d test samples.' % len(val_dataset))

    # start evaluation
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation.
    This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data batches.
    `num_data_batches` has to be set to a value as long as `num_split_frames` is multiples of 3.
    For example, when `num_data_batches` is set to 10,  `num_split_frames` will be 750/10=75, which is multiples of 3.
    If you have enough GPU memory and prefer faster evaluation speed, you can set `num_data_batches` to 1.
    """
    num_data_batches = 10
    if opt.ten_crop:
        num_frames = opt.num_segments * 10
    else:
        num_frames = opt.num_segments
    num_split_frames = int(num_frames * 3 / num_data_batches)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            outputs = []
            for seg_id in range(num_data_batches):
                bs = seg_id * num_split_frames
                be = (seg_id + 1) * num_split_frames
                if opt.input_5d:
                    new_batch = [batch[0][:, bs:be, :, :, :], batch[1]]
                else:
                    new_batch = [batch[0][:, bs:be, :, :], batch[1]]
                data, label = batch_fn(new_batch, ctx)
                for gpu_id, X in enumerate(data):
                    if opt.input_5d:
                        new_X = X.reshape((-1, 3, opt.new_length,
                                           opt.input_size, opt.input_size))
                    else:
                        new_X = X.reshape(
                            (-1, 3, opt.input_size, opt.input_size))
                    pred = net(new_X)
                    if seg_id == 0:
                        outputs.append(pred)
                    else:
                        outputs[gpu_id] = nd.concat(outputs[gpu_id],
                                                    pred,
                                                    dim=0)
            # Perform the mean operation on 'num_frames' samples of each video
            for gpu_id, out in enumerate(outputs):
                outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0)

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)
            mx.ndarray.waitall()

            _, cur_top1 = acc_top1.get()
            _, cur_top5 = acc_top5.get()

            if i > 0 and i % opt.log_interval == 0:
                print('%04d/%04d is done: acc-top1=%f acc-top5=%f' %
                      (i, len(val_data), cur_top1 * 100, cur_top5 * 100))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))