コード例 #1
0
    def test_multiple_labels_per_frame(self):
        frame_names = [f"{str(i)}.png" for i in range(3)]

        # Create csv containing a test frame videos.
        with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
            f.write(
                "original_vido_id video_id frame_id path labels\n".encode())
            with temp_frame_video(frame_names) as (frame_1_video_dir, data_1):
                for i, frame_name in enumerate(frame_names):
                    original_video_id = str(frame_1_video_dir)
                    video_id = "1"
                    frame_id = str(i)
                    path = pathlib.Path(frame_1_video_dir) / frame_name
                    label = "0,100"
                    f.write(
                        f"{original_video_id} {video_id} {frame_id} {path} {label}\n"
                        .encode())

                f.close()

                clip_sampler = make_clip_sampler(
                    "random",
                    0.1,  # Total duration of 3 frames at 30fps is 0.1 seconds.                )
                )
                dataset = Charades(f.name,
                                   clip_sampler=clip_sampler,
                                   video_sampler=SequentialSampler)

                sample = next(dataset)
                self.assertEqual(sample["label"],
                                 [[0, 100], [0, 100], [0, 100]])
                self.assertTrue(sample["video"].equal(data_1))
コード例 #2
0
 def test_single_clip_per_video_works(self):
     with temp_charades_dataset() as (filename, video_1, video_2):
         clip_sampler = make_clip_sampler(
             "uniform",
             0.1  # Total duration of 3 frames at 30fps is 0.1 seconds.
         )
         dataset = Charades(filename,
                            clip_sampler=clip_sampler,
                            video_sampler=SequentialSampler)
         expected = [([[0], [0], [0]], video_1), ([[1], [1], [1]], video_2)]
         for sample, expected_sample in zip(dataset, expected):
             self.assertEqual(sample["label"], expected_sample[0])
             self.assertTrue(sample["video"].equal(expected_sample[1]))
コード例 #3
0
    def test_multiple_clips_per_video_works(self):
        with temp_charades_dataset() as (filename, video_1, video_2):
            clip_sampler = make_clip_sampler(
                "uniform",
                0.033  # Expects each clip to have 1 frame each.
            )
            dataset = Charades(filename,
                               clip_sampler=clip_sampler,
                               video_sampler=SequentialSampler)

            expected = [
                ([[0]], video_1[:, 0:1]),
                ([[0]], video_1[:, 1:2]),
                ([[0]], video_1[:, 2:3]),
                ([[1]], video_2[:, 0:1]),
                ([[1]], video_2[:, 1:2]),
                ([[1]], video_2[:, 2:3]),
            ]
            for sample, expected_sample in zip(dataset, expected):
                self.assertEqual(sample["label"], expected_sample[0])
                self.assertTrue(sample["video"].equal(expected_sample[1]))
コード例 #4
0
ファイル: ptv_datasets.py プロジェクト: zgsxwsdxg/SlowFast
def Ptvcharades(cfg, mode):
    """
    Construct PyTorchVideo Charades video loader.
    Load Charades data (frame paths, labels, etc. ) to Charades Dataset object.
    The dataset could be downloaded from Chrades official website
    (https://allenai.org/plato/charades/).
    Please see datasets/DATASET.md for more information about the data format.
    For `train` and `val` mode, a single clip is randomly sampled from every video
    with random cropping, scaling, and flipping. For `test` mode, multiple clips are
    uniformaly sampled from every video with center cropping.
    Args:
        cfg (CfgNode): configs.
        mode (string): Options includes `train`, `val`, or `test` mode.
            For the train and val mode, the data loader will take data
            from the train or val set, and sample one clip per video.
            For the test mode, the data loader will take data from test set,
            and sample multiple clips per video.
    """
    # Only support train, val, and test mode.
    assert mode in [
        "train",
        "val",
        "test",
    ], "Split '{}' not supported".format(mode)

    logger.info("Constructing Ptvcharades {}...".format(mode))

    clip_duration = ((cfg.DATA.NUM_FRAMES - 1) * cfg.DATA.SAMPLING_RATE +
                     1) / cfg.DATA.TARGET_FPS

    if mode in ["train", "val"]:
        num_clips = 1
        num_crops = 1

        transform = Compose([
            ApplyTransformToKey(
                key="video",
                transform=Compose([
                    Lambda(div255),
                    NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
                    RandomShortSideScale(
                        min_size=cfg.DATA.TRAIN_JITTER_SCALES[0],
                        max_size=cfg.DATA.TRAIN_JITTER_SCALES[1],
                    ),
                    RandomCropVideo(cfg.DATA.TRAIN_CROP_SIZE),
                    Lambda(rgb2bgr),
                ] + ([RandomHorizontalFlipVideo(
                    p=0.5)] if cfg.DATA.RANDOM_FLIP else []) +
                                  [PackPathway(cfg)]),
            ),
            Lambda(
                functools.partial(
                    process_charades_label,
                    mode=mode,
                    num_classes=cfg.MODEL.NUM_CLASSES,
                )),
            DictToTuple(num_clips, num_crops),
        ])
        clip_sampler = make_clip_sampler("random", clip_duration)
        if cfg.NUM_GPUS > 1:
            video_sampler = DistributedSampler
        else:
            video_sampler = (RandomSampler
                             if mode == "train" else SequentialSampler)
    else:
        num_clips = cfg.TEST.NUM_ENSEMBLE_VIEWS
        num_crops = cfg.TEST.NUM_SPATIAL_CROPS

        transform = Compose([
            ApplyTransformToKey(
                key="video",
                transform=Compose([
                    Lambda(div255),
                    NormalizeVideo(cfg.DATA.MEAN, cfg.DATA.STD),
                    ShortSideScale(size=cfg.DATA.TEST_CROP_SIZE),
                ]),
            ),
            UniformCropVideo(size=cfg.DATA.TEST_CROP_SIZE),
            Lambda(
                functools.partial(
                    process_charades_label,
                    mode=mode,
                    num_classes=cfg.MODEL.NUM_CLASSES,
                )),
            ApplyTransformToKey(
                key="video",
                transform=Compose(
                    [Lambda(rgb2bgr), PackPathway(cfg)], ),
            ),
            DictToTuple(num_clips, num_crops),
        ])
        clip_sampler = make_clip_sampler(
            "constant_clips_per_video",
            clip_duration,
            num_clips,
            num_crops,
        )
        video_sampler = (DistributedSampler
                         if cfg.NUM_GPUS > 1 else SequentialSampler)

    data_path = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(mode))
    dataset = Charades(
        data_path=data_path,
        clip_sampler=clip_sampler,
        video_sampler=video_sampler,
        transform=transform,
        video_path_prefix=cfg.DATA.PATH_PREFIX,
        frames_per_clip=cfg.DATA.NUM_FRAMES,
    )

    logger.info("Constructing charades dataloader (size: {}) from {}".format(
        len(dataset._path_to_videos), data_path))

    return PTVDatasetWrapper(
        num_videos=len(dataset._path_to_videos),
        clips_per_video=num_clips,
        crops_per_clip=num_crops,
        dataset=dataset,
    )