Ejemplo n.º 1
0
def main(local_rank, c10d_backend, rdzv_init_url, max_world_size, classy_args):
    torch.manual_seed(0)
    set_video_backend(classy_args.video_backend)

    # Loads config, sets up task
    config = load_json(classy_args.config_file)

    task = build_task(config)

    # Load checkpoint, if available
    checkpoint = load_checkpoint(classy_args.checkpoint_folder)
    task.set_checkpoint(checkpoint)

    pretrained_checkpoint = load_checkpoint(classy_args.pretrained_checkpoint_folder)
    if pretrained_checkpoint is not None:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(pretrained_checkpoint)

    hooks = [
        LossLrMeterLoggingHook(classy_args.log_freq),
        ModelComplexityHook(),
        TimeMetricsHook(),
    ]

    if classy_args.checkpoint_folder != "":
        args_dict = vars(classy_args)
        args_dict["config"] = config
        hooks.append(
            CheckpointHook(
                classy_args.checkpoint_folder,
                args_dict,
                checkpoint_period=classy_args.checkpoint_period,
            )
        )
    if classy_args.profiler:
        hooks.append(ProfilerHook())

    task.set_hooks(hooks)

    assert c10d_backend == Backend.NCCL or c10d_backend == Backend.GLOO
    if c10d_backend == torch.distributed.Backend.NCCL:
        # needed to enable NCCL error handling
        os.environ["NCCL_BLOCKING_WAIT"] = "1"

    coordinator = CoordinatorP2P(
        c10d_backend=c10d_backend,
        init_method=rdzv_init_url,
        max_num_trainers=max_world_size,
        process_group_timeout=60000,
    )
    trainer = ElasticTrainer(
        use_gpu=classy_args.device == "gpu",
        num_dataloader_workers=classy_args.num_workers,
        local_rank=local_rank,
        elastic_coordinator=coordinator,
        input_args={},
    )
    trainer.train(task)
Ejemplo n.º 2
0
    def test_video_dataset_from_folder(self):

        self.create_dataset()

        # iterate through different backends
        for backend in ['pyav', 'video_reader']:
            torchvision.set_video_backend(backend)

            # create dataset
            dataset = VideoDataset(self.input_dir, extensions=self.extensions)

            # __len__
            self.assertEqual(len(dataset),
                             self.n_frames_per_video * self.n_videos)

            # __getitem__
            for i in range(len(dataset)):
                frame, label = dataset[i]
                self.assertIsInstance(frame, PIL.Image.Image)
                self.assertEqual(label, i // self.n_frames_per_video)

            # get_filename
            for i in range(len(dataset)):
                frame, label = dataset[i]
                filename = dataset.get_filename(i)
                print(filename)
                self.assertTrue(
                    filename.endswith(
                        f"-{(i % self.n_frames_per_video):02d}-avi.png"))

        shutil.rmtree(self.input_dir)
Ejemplo n.º 3
0
    def test_video_similar_timestamps_for_different_backends(self):
        if not VIDEO_DATASET_AVAILABLE:
            warnings.warn(
                'Did not test video dataset because of missing requirements')
            return

        self.create_dataset()

        timestamps = []
        offsets = []
        backends = []

        # iterate through different backends
        for backend in ['pyav', 'video_reader']:
            torchvision.set_video_backend(backend)

            _, video_timestamps, video_offsets, _ = \
                _make_dataset(self.input_dir, extensions=self.extensions)
            timestamps.append(video_timestamps)
            offsets.append(video_offsets)
            backends.append(backend)

        # make sure backends don't match (sanity check)
        self.assertNotEqual(backends[0], backends[1])

        # we expect the same timestamps and offsets
        self.assertEqual(timestamps[0], timestamps[1])
        self.assertEqual(offsets[0], offsets[1])

        shutil.rmtree(self.input_dir)
Ejemplo n.º 4
0
    def test_video_similar_timestamps_for_different_backends(self):

        self.create_dataset()

        timestamps = []
        offsets = []
        backends = []

        # iterate through different backends
        for backend in ['pyav', 'video_reader']:
            torchvision.set_video_backend(backend)

            _, video_timestamps, video_offsets, _ = \
                _make_dataset(self.input_dir, extensions=self.extensions)
            timestamps.append(video_timestamps)
            offsets.append(video_offsets)
            backends.append(backend)

        # make sure backends don't match (sanity check)
        self.assertNotEqual(backends[0], backends[1])

        # we expect the same timestamps and offsets
        self.assertEqual(timestamps[0], timestamps[1])
        self.assertEqual(offsets[0], offsets[1])

        shutil.rmtree(self.input_dir)
Ejemplo n.º 5
0
    def test_invalid_file(self):
        set_video_backend("video_reader")
        with pytest.raises(RuntimeError):
            io.read_video("foo.mp4")

        set_video_backend("pyav")
        with pytest.raises(RuntimeError):
            io.read_video("foo.mp4")
Ejemplo n.º 6
0
    def test_invalid_file(self):
        set_video_backend('video_reader')
        with self.assertRaises(RuntimeError):
            io.read_video('foo.mp4')

        set_video_backend('pyav')
        with self.assertRaises(RuntimeError):
            io.read_video('foo.mp4')
Ejemplo n.º 7
0
    def _worker_init_fn(self, worker_id):
        # we need to set video backend in the worker process explicitly
        # because the global variable `_video_backend` in TorchVision will
        # always start with the default value `pyav` when multiprocessing
        # context other than `fork` is used, and it won't inherit the value of
        # `_video_backend` in the main process

        from torchvision import set_video_backend

        set_video_backend(self.video_backend)
Ejemplo n.º 8
0
 def test_audio_present(self):
     """Test if audio frames are returned with video_reader backend."""
     set_video_backend('video_reader')
     for test_video, _ in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         container = av.open(full_path)
         if container.streams.audio:
             _, audio, _ = io.read_video(full_path)
             self.assertGreaterEqual(audio.shape[0], 1)
             self.assertGreaterEqual(audio.shape[1], 1)
Ejemplo n.º 9
0
 def test_audio_present_sec(self, test_video, backend, start_offset,
                            end_offset):
     """Test if audio frames are returned with sec unit."""
     full_path = os.path.join(VIDEO_DIR, test_video)
     container = av.open(full_path)
     if container.streams.audio:
         set_video_backend(backend)
         _, audio, _ = io.read_video(full_path,
                                     start_offset,
                                     end_offset,
                                     pts_unit="sec")
         assert all([dimension > 0 for dimension in audio.shape[:2]])
Ejemplo n.º 10
0
def main(args, config):
    # Global flags
    torch.manual_seed(0)
    set_image_backend(args.image_backend)
    set_video_backend(args.video_backend)

    task = build_task(config)

    # Load checkpoint, if available.
    checkpoint = load_checkpoint(args.checkpoint_load_path)
    task.set_checkpoint(checkpoint)

    # Load a checkpoint contraining a pre-trained model. This is how we
    # implement fine-tuning of existing models.
    pretrained_checkpoint = load_checkpoint(args.pretrained_checkpoint_path)
    if pretrained_checkpoint is not None:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(pretrained_checkpoint)

    # Configure hooks to do tensorboard logging, checkpoints and so on
    task.set_hooks(configure_hooks(args, config))

    use_gpu = None
    if args.device is not None:
        use_gpu = args.device == "gpu"
        assert torch.cuda.is_available() or not use_gpu, "CUDA is unavailable"

    # LocalTrainer is used for a single node. DistributedTrainer will setup
    # training to use PyTorch's DistributedDataParallel.
    trainer_class = {
        "none": LocalTrainer,
        "ddp": DistributedTrainer
    }[args.distributed_backend]

    trainer = trainer_class(use_gpu=use_gpu,
                            num_dataloader_workers=args.num_workers)

    logging.info(f"Starting training on rank {get_rank()} worker. "
                 f"World size is {get_world_size()}")
    # That's it! When this call returns, training is done.
    trainer.train(task)

    output_folder = Path(args.checkpoint_folder).resolve()
    logging.info("Training successful!")
    logging.info(
        f'Results of this training run are available at: "{output_folder}"')
Ejemplo n.º 11
0
 def test_metadata(self):
     """
     Test that the metadata returned via pyav corresponds to the one returned
     by the new video decoder API
     """
     torchvision.set_video_backend("pyav")
     for test_video, config in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         reader = Video(full_path, "video")
         reader_md = reader.get_metadata()
         self.assertAlmostEqual(
             config.video_fps, reader_md["video"]["fps"][0], delta=0.0001
         )
         self.assertAlmostEqual(
             config.duration, reader_md["video"]["duration"][0], delta=0.5
         )
Ejemplo n.º 12
0
 def test_audio_present_sec(self):
     """Test if audio frames are returned with sec unit."""
     backends = ["video_reader", "pyav"]
     start_offsets = [0, 0.1]
     end_offsets = [0.3, None]
     for test_video, _ in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         container = av.open(full_path)
         if container.streams.audio:
             for backend, start_offset, end_offset in itertools.product(
                     backends, start_offsets, end_offsets):
                 set_video_backend(backend)
                 _, audio, _ = io.read_video(full_path,
                                             start_offset,
                                             end_offset,
                                             pts_unit="sec")
                 assert all(
                     [dimension > 0 for dimension in audio.shape[:2]])
Ejemplo n.º 13
0
def main(args, config):
    # Global flags
    torch.manual_seed(0)
    set_image_backend(args.image_backend)
    set_video_backend(args.video_backend)

    task = build_task(config)

    # Load checkpoint, if available.
    if args.checkpoint_load_path:
        task.set_checkpoint(args.checkpoint_load_path)

    # Load a checkpoint contraining a pre-trained model. This is how we
    # implement fine-tuning of existing models.
    if args.pretrained_checkpoint_path:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(args.pretrained_checkpoint_path)

    # Configure hooks to do tensorboard logging, checkpoints and so on.
    # `configure_hooks` adds default hooks, while extra hooks can be specified
    # in config file and stored in `task.hooks`. Here, we merge them when we
    # set the final hooks of the task.
    task.set_hooks(configure_hooks(args, config) + task.hooks)

    # LocalTrainer is used for a single replica. DistributedTrainer will setup
    # training to use PyTorch's DistributedDataParallel.
    trainer_class = {
        "none": LocalTrainer,
        "ddp": DistributedTrainer
    }[args.distributed_backend]

    trainer = trainer_class()

    logging.info(f"Starting training on rank {get_rank()} worker. "
                 f"World size is {get_world_size()}")
    # That's it! When this call returns, training is done.
    trainer.train(task)

    output_folder = Path(args.checkpoint_folder).resolve()
    logging.info("Training successful!")
    logging.info(
        f'Results of this training run are available at: "{output_folder}"')
Ejemplo n.º 14
0
 def test_read_video_tensor(self):
     """
     Check if reading the video using the `next` based API yields the
     same sized tensors as the pyav alternative.
     """
     torchvision.set_video_backend("pyav")
     for test_video, config in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         # pass 1: decode all frames using existing TV decoder
         tv_result, _, _ = torchvision.io.read_video(full_path,
                                                     pts_unit="sec")
         tv_result = tv_result.permute(0, 3, 1, 2)
         # pass 2: decode all frames using new api
         reader = VideoReader(full_path, "video")
         frames = []
         for frame in reader:
             frames.append(frame['data'])
         new_api = torch.stack(frames, 0)
         self.assertEqual(tv_result.size(), new_api.size())
Ejemplo n.º 15
0
from fractions import Fraction

from PIL import Image

import torchvision
from torchvision import datasets
from torchvision import io

try:
    import av
    AV_AVAILABLE = True
except ImportError:
    AV_AVAILABLE = False

if io._HAS_VIDEO_OPT:
    torchvision.set_video_backend('video_reader')


class VideoLoader():
    """Implementation of VideoLoader.

    The VideoLoader is a wrapper around the torchvision video interface. With
    the VideoLoader you can read specific frames or the next frames of a video.
    It automatically switches to the `video_loader` backend if available. Reading
    sequential frames is significantly faster since it uses the VideoReader 
    class from torchvision.

    The video loader automatically detects if you read out subsequent frames and
    will use the fast read method if possible. 

    Attributes:
Ejemplo n.º 16
0
import argparse
import timeit
import os
import pandas as pd

import itertools
import torchvision

parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument("n", type=int, help="Number of trials to run")
args = parser.parse_args()

setup_tvvr = """\
import torch
import torchvision
torchvision.set_video_backend("video_reader")
"""


def measure_reading_video(path):
    vframes, _, _ = torchvision.io.read_video(path)


loaders = []
times_per_video = []
times_random_seek = []
video = []
num_frames = []
lib_version = []

for i in range(args.n):
import unittest
from torchvision import set_video_backend
import test_datasets_video_utils


set_video_backend('video_reader')


if __name__ == '__main__':
    suite = unittest.TestLoader().loadTestsFromModule(test_datasets_video_utils)
    unittest.TextTestRunner(verbosity=1).run(suite)
import torch.utils.data as data
from PIL import Image
import traceback
import logging
import torchvision
from utils.sample_speedup import *
from transforms import *

# from memory_profiler import profile
import gc
## For Dataset
WIDTH = 256
HEIGHT = 340

from torchvision.io.video import read_video
torchvision.set_video_backend('pyav')


class FullDecodeDataSet(data.Dataset):
    def __init__(self, data_root, video_list, num_segments, is_train):

        self._data_root = data_root
        self._num_segments = num_segments
        self._is_train = is_train
        self._iframe_scales = [1, .875, .75]
        self._mv_scales = [1, .875, .75, .66]
        self._input_size = 224
        self._scale_size = self._input_size * 256 // 224
        self._iframe_transform = torchvision.transforms.Compose([
            GroupMultiScaleCrop(self._input_size, self._iframe_scales),
            GroupRandomHorizontalFlip(is_mv=False)
Ejemplo n.º 19
0
def train_main(args):
    torchvision.set_video_backend("video_reader")
    if args.apex:
        if sys.version_info < (3, 0):
            raise RuntimeError(
                "Apex currently only supports Python 3. Aborting.")
        if amp is None:
            raise RuntimeError("Failed to import apex. Please install apex "
                               "from https://www.github.com/nvidia/apex "
                               "to enable mixed-precision training.")

    if args.output_dir:
        utils.mkdir(args.output_dir)

    utils.init_distributed_mode(args)
    print(args)
    print("torch version: ", torch.__version__)
    print("torchvision version: ", torchvision.__version__)

    device = torch.device(args.device)

    torch.backends.cudnn.benchmark = True
    writer = setup_tbx(args.output_dir)

    # Data loading code
    print("Loading data")

    print("\t Loading datasets")
    st = time.time()

    if not args.eval_only:
        print("\t Loading train data")
        transform_train = torchvision.transforms.Compose([
            T.ToTensorVideo(),
            T.Resize((args.scale_h, args.scale_w)),
            T.RandomHorizontalFlipVideo(),
            T.NormalizeVideo(mean=(0.43216, 0.394666, 0.37645),
                             std=(0.22803, 0.22145, 0.216989)),
            T.RandomCropVideo((args.crop_size, args.crop_size)),
        ])
        dataset = get_dataset(args, transform_train)
        dataset.video_clips.compute_clips(args.num_frames, 1, frame_rate=15)
        train_sampler = RandomClipSampler(dataset.video_clips,
                                          args.train_bs_multiplier)
        if args.distributed:
            train_sampler = DistributedSampler(train_sampler)
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=args.batch_size,
            sampler=train_sampler,
            num_workers=args.workers,
        )

    print("\t Loading validation data")
    transform_test = torchvision.transforms.Compose([
        T.ToTensorVideo(),
        T.Resize((args.scale_h, args.scale_w)),
        T.NormalizeVideo(mean=(0.43216, 0.394666, 0.37645),
                         std=(0.22803, 0.22145, 0.216989)),
        T.CenterCropVideo((args.crop_size, args.crop_size)),
    ])
    dataset_test = get_dataset(args, transform_test, split="val")
    dataset_test.video_clips.compute_clips(args.num_frames, 1, frame_rate=15)
    test_sampler = UniformClipSampler(dataset_test.video_clips,
                                      args.val_clips_per_video)
    if args.distributed:
        test_sampler = DistributedSampler(test_sampler)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test,
        batch_size=args.batch_size,
        sampler=test_sampler,
        num_workers=args.workers,
    )

    criterion = nn.CrossEntropyLoss()

    print("Creating model")
    # TODO: model only from our models
    available_models = {**models.__dict__}
    model = available_models[args.model](pretraining=args.pretrained)
    model.to(device)
    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    if args.resume_from_model and not args.resume:
        checkpoint = torch.load(args.resume_from_model, map_location="cpu")
        if "model" in checkpoint.keys():
            model.load_state_dict(checkpoint["model"])
        else:
            model.load_state_dict(checkpoint)

    if args.finetune:
        assert args.resume_from_model is not None or args.pretrained
        model.fc = nn.Linear(model.fc.in_features, args.num_finetune_classes)

    lr = args.lr * args.world_size
    if args.finetune:
        params = [
            {
                "params": model.stem.parameters(),
                "lr": 0
            },
            {
                "params": model.layer1.parameters(),
                "lr": args.l1_lr * args.world_size
            },
            {
                "params": model.layer2.parameters(),
                "lr": args.l2_lr * args.world_size
            },
            {
                "params": model.layer3.parameters(),
                "lr": args.l3_lr * args.world_size
            },
            {
                "params": model.layer4.parameters(),
                "lr": args.l4_lr * args.world_size
            },
            {
                "params": model.fc.parameters(),
                "lr": args.fc_lr * args.world_size
            },
        ]
    else:
        params = model.parameters()

    print(params)

    optimizer = torch.optim.SGD(
        params,
        lr=lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )

    if args.apex:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.apex_opt_level)

    # convert scheduler to be per iteration,
    # not per epoch, for warmup that lasts
    # between different epochs
    if not args.eval_only:
        warmup_iters = args.lr_warmup_epochs * len(data_loader)
        lr_milestones = [len(data_loader) * m for m in args.lr_milestones]
        lr_scheduler = WarmupMultiStepLR(
            optimizer,
            milestones=lr_milestones,
            gamma=args.lr_gamma,
            warmup_iters=warmup_iters,
            warmup_factor=1e-5,
        )

    if os.path.isfile(os.path.join(args.output_dir, "checkpoint.pth")):
        args.resume = os.path.join(args.output_dir, "checkpoint.pth")

    if args.resume:
        checkpoint = torch.load(args.resume, map_location="cpu")
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
        args.start_epoch = checkpoint["epoch"] + 1

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module

    if args.eval_only:
        print("Starting test_only")
        metric_logger = MetricLogger(delimiter="  ",
                                     writer=writer,
                                     stat_set="val")
        evaluate(model, criterion, data_loader_test, device, metric_logger)
        return

    # Get training metric logger
    stat_loggers = get_default_loggers(writer, args.start_epoch)

    print("Start training")
    start_time = time.time()

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(
            model,
            criterion,
            optimizer,
            lr_scheduler,
            data_loader,
            device,
            epoch,
            args.print_freq,
            stat_loggers["train"],
            args.apex,
        )
        evaluate(model, criterion, data_loader_test, device,
                 stat_loggers["val"])
        if args.output_dir:
            checkpoint = {
                "model": model_without_ddp.state_dict(),
                "optimizer": optimizer.state_dict(),
                "lr_scheduler": lr_scheduler.state_dict(),
                "epoch": epoch,
                "args": args,
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, "model_{}.pth".format(epoch)))
            utils.save_on_master(
                checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))

        # reset all meters in the metric logger
        for log in stat_loggers:
            stat_loggers[log].reset_meters()

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))
Ejemplo n.º 20
0
            checkpoint = {
                "model": model_without_ddp.state_dict(),
                "optimizer": optimizer.state_dict(),
                "lr_scheduler": lr_scheduler.state_dict(),
                "epoch": epoch,
                "args": args,
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, "model_{}.pth".format(epoch)))
            utils.save_on_master(
                checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))

        # reset all meters in the metric logger
        for log in stat_loggers:
            stat_loggers[log].reset_meters()

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))


if __name__ == "__main__":
    from vmz.func.opts import parse_args
    import torchvision

    torchvision.set_video_backend("video_reader")
    args = parse_args()
    train_main(args)
    exit()