Ejemplo n.º 1
0
def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.data_dir
    normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        return data, label

    transform_train = transforms.Compose([
        video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios),
        video.VideoRandomHorizontalFlip(),
        video.VideoToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        video.VideoCenterCrop(size=input_size),
        video.VideoToTensor(),
        normalize
    ])

    train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True,
                                                 new_width=opt.new_width, new_height=opt.new_height,
                                                 target_width=input_size, target_height=input_size,
                                                 num_segments=opt.num_segments, transform=transform_train)
    val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False,
                                               new_width=opt.new_width, new_height=opt.new_height,
                                               target_width=input_size, target_height=input_size,
                                               num_segments=opt.num_segments, transform=transform_test)
    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    if opt.num_segments > 1:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
    else:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_data, val_data, batch_fn
Ejemplo n.º 2
0
################################################################
# Data Augmentation and Data Loader
# ---------------------------------
#
# Data augmentation for video is different from image. For example, if you
# want to randomly crop a video sequence, you need to make sure all the video
# frames in this sequence undergo the same cropping process. We provide a
# new set of transformation functions, working with multiple images.
# Please checkout the `video.py <../../../gluoncv/data/transforms/video.py>`_ for more details.
# Most video data augmentation strategies used here are introduced in [Wang15]_.

transform_train = transforms.Compose([
    # Fix the input video frames size as 256×340 and randomly sample the cropping width and height from
    # {256,224,192,168}. After that, resize the cropped regions to 224 × 224.
    video.VideoMultiScaleCrop(size=(224, 224),
                              scale_ratios=[1.0, 0.875, 0.75, 0.66]),
    # Randomly flip the video frames horizontally
    video.VideoRandomHorizontalFlip(),
    # Transpose the video frames from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    video.VideoToTensor(),
    # Normalize the video frames with mean and standard deviation calculated across all images
    video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

##################################################################
# With the transform functions, we can define data loaders for our
# training datasets.

# Batch Size for Each GPU
per_device_batch_size = 25