Esempio n. 1
0
    def test_random_crop_video(self):
        numFrames = random.randint(4, 128)
        height = random.randint(10, 32) * 2
        width = random.randint(10, 32) * 2
        oheight = random.randint(5, (height - 2) / 2) * 2
        owidth = random.randint(5, (width - 2) / 2) * 2
        clip = torch.randint(0,
                             256, (numFrames, height, width, 3),
                             dtype=torch.uint8)
        result = Compose([
            transforms.ToTensorVideo(),
            transforms.RandomCropVideo((oheight, owidth)),
        ])(clip)
        self.assertEqual(result.size(2), oheight)
        self.assertEqual(result.size(3), owidth)

        transforms.RandomCropVideo((oheight, owidth)).__repr__()
Esempio n. 2
0
    def __init__(self, size, consistent=True):
        if isinstance(size, numbers.Number):
            self.size = (int(size), int(size))
        else:
            self.size = size
        self.consistent = consistent

        self.operation_torch = transforms_video.RandomCropVideo(size)
Esempio n. 3
0
def train_transform(s):
    return transforms.Compose([
        transforms_video.ToTensorVideo(),
        transforms_video.RandomHorizontalFlipVideo(),
        transforms_video.RandomResizeVideo((s, round(s * 1.5))),
        transforms_video.NormalizeVideo(mean=[0.43216, 0.394666, 0.37645],
                                        std=[0.22803, 0.22145, 0.216989]),
        transforms_video.RandomCropVideo(s)
    ])
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.transforms._transforms_video as v_transform
import torch

TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 128
FRAME_LENGTH = 16

transform = transforms.Compose([
    v_transform.ToTensorVideo(),
    v_transform.NormalizeVideo(mean=[0.43216, 0.394666, 0.37645],
                               std=[0.22803, 0.22145, 0.216989]),
    v_transform.RandomHorizontalFlipVideo(),
    v_transform.RandomCropVideo(112),
])


def custom_collate(batch):
    filtered_batch = []
    for video, _, label in batch:
        filtered_batch.append((video, label))
    return torch.utils.data.dataloader.default_collate(filtered_batch)


trainset = datasets.UCF101(
    root='data/UCF101/UCF-101',
    annotation_path=
    'data/UCF101TrainTestSplits-RecognitionTask/ucfTrainTestlist',
    frames_per_clip=FRAME_LENGTH,