Example #1
0
def temp_encoded_video(num_frames: int,
                       fps: int,
                       height=10,
                       width=10,
                       prefix=None,
                       directory=None):
    """Creates a temporary lossless, mp4 video with synthetic content.

    Uses a context which deletes the video after exit.
    """
    # Lossless options.
    video_codec = "libx264rgb"
    options = {"crf": "0"}
    data = create_dummy_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(prefix=prefix,
                                     suffix=".mp4",
                                     dir=directory) as f:
        f.close()
        io.write_video(f.name,
                       data,
                       fps=fps,
                       video_codec=video_codec,
                       options=options)
        yield f.name, thwc_to_cthw(data).to(torch.float32)
    os.unlink(f.name)
def temp_video(num_frames,
               height,
               width,
               fps,
               lossless=False,
               video_codec=None,
               options=None):
    if lossless:
        if video_codec is not None:
            raise ValueError(
                "video_codec can't be specified together with lossless")
        if options is not None:
            raise ValueError(
                "options can't be specified together with lossless")
        video_codec = "libx264rgb"
        options = {"crf": "0"}
    if video_codec is None:
        video_codec = "libx264"
    if options is None:
        options = {}
    data = _create_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
        f.close()
        io.write_video(f.name,
                       data,
                       fps=fps,
                       video_codec=video_codec,
                       options=options)
        yield f.name, data
    os.unlink(f.name)
Example #3
0
def temp_video(num_frames,
               height,
               width,
               fps,
               lossless=False,
               video_codec=None,
               options=None):
    if lossless:
        assert video_codec is None, "video_codec can't be specified together with lossless"
        assert options is None, "options can't be specified together with lossless"
        video_codec = 'libx264rgb'
        options = {'crf': '0'}

    if video_codec is None:
        video_codec = 'libx264'
    if options is None:
        options = {}

    data = _create_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
        io.write_video(f.name,
                       data,
                       fps=fps,
                       video_codec=video_codec,
                       options=options)
        yield f.name, data
Example #4
0
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None):
    if lossless:
        if video_codec is not None:
            raise ValueError("video_codec can't be specified together with lossless")
        if options is not None:
            raise ValueError("options can't be specified together with lossless")
        video_codec = 'libx264rgb'
        options = {'crf': '0'}

    if video_codec is None:
        if get_video_backend() == "pyav":
            video_codec = 'libx264'
        else:
            # when video_codec is not set, we assume it is libx264rgb which accepts
            # RGB pixel formats as input instead of YUV
            video_codec = 'libx264rgb'
    if options is None:
        options = {}

    data = _create_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
        f.close()
        io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options)
        yield f.name, data
    os.unlink(f.name)
Example #5
0
def blur_background(video_path,
                    respth='./res/test_res',
                    cp='model_final_diss.pth'):
    frames, audio, info = read_video(video_path, 61, 65, pts_unit="sec")

    scale_labels = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((frames.shape[1], frames.shape[2]),
                          interpolation=Image.NEAREST),
        transforms.ToTensor()
    ])

    labels = label_images(frames, cp)
    new_frames = []
    for frame_inx in tqdm.tqdm(
            list(range(frames.shape[0])),
            desc="generating segmented frames with background"):
        scaled_label = scale_labels(labels[frame_inx].type(
            torch.uint8)).squeeze(0)
        scaled_label = torch.stack([scaled_label, scaled_label, scaled_label],
                                   dim=2)

        blurred_img = torch.from_numpy(
            cv2.blur(frames[frame_inx].numpy(), (15, 15)))

        new_frames.append(
            torch.where(scaled_label > 0, frames[frame_inx], blurred_img))

    new_frames = torch.stack(new_frames)

    write_video(
        os.path.join(respth, "blurred_background" +
                     os.path.basename(video_path)) + ".mp4", new_frames,
        round(info["video_fps"]))
    def test_reading_from_directory_structure(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            test_class_1 = root_dir_name / "running"
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = root_dir_name / "cleaning windows"
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)
            with tempfile.NamedTemporaryFile(
                    suffix=".mp4",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix=".mp4", dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                clip_sampler = make_clip_sampler("uniform", 3)
                labeled_video_paths = LabeledVideoPaths.from_path(root_dir)
                dataset = LabeledVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                self.assertEqual(sample_1["label"], 0)
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))

                sample_2 = next(dataset)
                self.assertEqual(sample_2["label"], 1)
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))
Example #7
0
    def test_write_read_video(self):
        with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
            data = self._create_video_frames(10, 300, 300)
            io.write_video(f.name, data, fps=5)

            lv, _, info = io.read_video(f.name)

            self.assertTrue(
                (data.float() - lv.float()).abs().max() < self.TOLERANCE)
            self.assertEqual(info["video_fps"], 5)
Example #8
0
def label_and_render_video(video_path,
                           respth='./res/test_res',
                           cp='model_final_diss.pth'):
    frames, audio, info = read_video(video_path, pts_unit="sec")

    labels = label_images(frames, cp)
    segmented_frames = []
    for frame_inx in tqdm.tqdm(list(range(frames.shape[0])),
                               desc="generating segmented frames"):
        segmented_frames.append(
            render_segmented_image(frames[frame_inx], labels[frame_inx]))

    segmented_frames = torch.stack(segmented_frames)

    write_video(
        os.path.join(respth, "segment" + os.path.basename(video_path)) +
        ".mp4", segmented_frames, info["video_fps"])
Example #9
0
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
    names = []
    for i in range(num_videos):
        if sizes is None:
            size = 5 * (i + 1)
        else:
            size = sizes[i]
        if fps is None:
            f = 5
        else:
            f = fps[i]
        data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8)
        name = os.path.join(tmpdir, "{}.mp4".format(i))
        names.append(name)
        io.write_video(name, data, fps=f)

    return names
Example #10
0
    def test_read_partial_video(self):
        with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
            data = self._create_video_frames(10, 300, 300)
            io.write_video(f.name, data, fps=5)

            pts = io.read_video_timestamps(f.name)

            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f.name, pts[start],
                                             pts[start + l - 1])
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue((s_data.float() -
                                     lv.float()).abs().max() < self.TOLERANCE)

            lv, _, _ = io.read_video(f.name, pts[4] + 1, pts[7])
            self.assertEqual(len(lv), 4)
            self.assertTrue(
                (data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
Example #11
0
    def test_read_timestamps(self):
        with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
            data = self._create_video_frames(10, 300, 300)
            io.write_video(f.name, data, fps=5)

            pts = io.read_video_timestamps(f.name)

            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
            container = av.open(f.name)
            stream = container.streams[0]
            pts_step = int(
                round(float(1 / (stream.average_rate * stream.time_base))))
            num_frames = int(
                round(
                    float(stream.average_rate * stream.time_base *
                          stream.duration)))
            expected_pts = [i * pts_step for i in range(num_frames)]

            self.assertEqual(pts, expected_pts)
Example #12
0
def read_and_reshape(inpt):
    video_dir, new_name = inpt

    width = height = 320
    sample = 10

    vframes, aframes, info = read_video(video_dir)
    t, w, h, c = vframes.shape
    w_start = int(w / 2 - width)
    w_end = int(w / 2 + width)
    h_start = int(h / 2) - height
    h_end = int(h / 2) + height

    vframes = vframes[:, w_start:w_end, h_start:h_end, :]
    sub_sampled = []
    for i, elem in enumerate(vframes):
        if i % sample == 0:
            sub_sampled.append(elem)
    sub_sampled = torch.stack(sub_sampled)

    write_video(new_name, sub_sampled, fps=sub_sampled.shape[0])
Example #13
0
def replace_background(video_path,
                       bk_img_path,
                       respth='./res/test_res',
                       cp='model_final_diss.pth'):
    frames, audio, info = read_video(video_path, pts_unit="sec")
    bkg = transforms.Compose([
        transforms.Resize((frames[0].shape[0], frames[0].shape[1])),
        transforms.ToTensor()
    ])(Image.open(bk_img_path)).transpose(0, 2).transpose(0, 1)

    # scaling to [0,255] and casting to uint8
    bkg = (bkg * 255).type(torch.uint8)

    scale_labels = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((frames.shape[1], frames.shape[2]),
                          interpolation=Image.NEAREST),
        transforms.ToTensor()
    ])

    labels = label_images(frames, cp)
    new_frames = []
    for frame_inx in tqdm.tqdm(
            list(range(frames.shape[0])),
            desc="generating segmented frames with background"):
        scaled_label = scale_labels(labels[frame_inx].type(
            torch.uint8)).squeeze(0)
        scaled_label = torch.stack([scaled_label, scaled_label, scaled_label],
                                   dim=2)

        new_frames.append(torch.where(scaled_label > 0, frames[frame_inx],
                                      bkg))

    new_frames = torch.stack(new_frames)

    write_video(
        os.path.join(respth, "background" + os.path.basename(video_path)) +
        ".mp4", new_frames, info["video_fps"])
Example #14
0
    def render_video(self,
                     src: str,
                     dst: str,
                     batch_size: int = 1,
                     num_workers: int = 1) -> None:
        video, _, info = self.fetch_video(src)

        dataset = TensorDataset(video)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers)

        styled_video = torch.zeros_like(video)
        styled_name = os.path.join(dst, os.path.basename(src))
        with torch.no_grad():
            for i, batch in enumerate(loader):
                styled_video[i * batch_size:(i + 1) *
                             batch_size] = self.render(batch[0]).cpu()
                print('%d/%d' % (i, len(loader)))

        styled_video = styled_video.permute((0, 2, 3, 1)) * 127.5 + 127.5
        write_video(styled_name, styled_video, float(info['video_fps']))
Example #15
0
        SPL_onehot = SPL_onehot.permute(2, 0, 1)
        return SPL_onehot
 
 
if __name__ == '__main__':
    with open('test_data/test.lst') as f:
        persons = [line.strip() for line in f][:4]
    opt = InferOptions().parse()
    pipeline = InferencePipeline.from_opts(opt)
    videos = [io.read_video('test_data/seq.mp4', pts_unit='sec')[0]]
    segs = [torch.zeros_like(videos[0], dtype=torch.uint8)]
    images = [torch.zeros_like(videos[0], dtype=torch.uint8)]
    for person in persons:
        source_image = Image.open(f'test_data/test/{person}.jpg')
        pipeline.segmentator.path = f'test_data/testSPL2/{person}.png'
        frames, segmentations = zip(*pipeline.render_video(source_image, 'test_data/seq/'))
        frames = torch.cat(frames)
        frames = frames.float()
        frames = torch.movedim(frames, 1, 3)
        frames = (frames + 1) / 2.0 * 255.0
        videos.append(frames.byte())
        segmentations = torch.cat(segmentations)
        segmentations = torch.stack([torch.from_numpy(util.tensor2im(torch.argmax(sf, axis=0, keepdim=True).data, True)) for sf in segmentations])
        segs.append(segmentations.byte())
        source_image_tensor = torch.from_numpy(np.array(source_image)).unsqueeze(0).expand(frames.size())
        images.append(source_image_tensor)
 
    comp_video = torch.cat([torch.cat(part, dim=2) for part in (images, segs, videos)], dim=1)
    io.write_video('test_data/out.mp4', comp_video, fps=30)
 
    # output_image.save(OUPUT_PATH)
    def test_reading_from_directory_structure_hmdb51(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            action_1 = "running"
            action_2 = "cleaning_windows"

            videos_root_dir = root_dir_name / "videos"
            videos_root_dir.mkdir()

            test_class_1 = videos_root_dir / action_1
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = videos_root_dir / action_2
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)

            test_splits = root_dir_name / "folds"
            test_splits.mkdir()

            with tempfile.NamedTemporaryFile(
                    suffix="_u_nm_np1_ba_goo_19.avi",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix="_u_nm_np1_fr_med_1.avi",
                        dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                _, video_name_1 = os.path.split(f_1.name)
                _, video_name_2 = os.path.split(f_2.name)

                with open(
                        os.path.join(test_splits,
                                     action_1 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_1} 1\n")

                with open(
                        os.path.join(test_splits,
                                     action_2 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_2} 1\n")

                clip_sampler = make_clip_sampler("uniform", 3)
                dataset = Hmdb51(
                    data_path=test_splits,
                    video_path_prefix=root_dir_name / "videos",
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    split_id=1,
                    split_type="train",
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                sample_2 = next(dataset)

                self.assertTrue(sample_1["label"] in [action_1, action_2])
                if sample_1["label"] == action_2:
                    sample_1, sample_2 = sample_2, sample_1

                self.assertEqual(sample_1["label"], action_1)
                self.assertEqual(5, len(sample_1["meta_tags"]))
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))

                self.assertEqual(sample_2["label"], action_2)
                self.assertEqual(5, len(sample_2["meta_tags"]))
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))
Example #17
0
def plot_video(orig: Tensor,
               recons: Tensor,
               model_name: str,
               epoch: int,
               out_path: str,
               rows: int,
               cols: int,
               fps: int,
               thumbnail_width: int = None,
               thumbnail_height: int = None):
    if orig.shape[-2:] != (thumbnail_height, thumbnail_width):
        # Resize each frame
        to_pil = ToPILImage()
        to_tensor = ToTensor()
        resize = Resize((thumbnail_height, thumbnail_width))

        def transform(x):
            return to_tensor(resize(to_pil(x)))

        def resize_batch(batch):
            return torch.cat([
                torch.cat(
                    [transform(frame).unsqueeze(dim=0)
                     for frame in video]).unsqueeze(dim=0) for video in batch
            ])

        recons = resize_batch(recons)
        orig = resize_batch(orig)

    # Convert [B, T, C, H, W] to [T, C, H, W]

    # Distributing the batch dimension in a grid
    n = min(rows * cols, orig.shape[0])
    i = 0
    video_rows = []
    for _ in range(rows):
        done = False
        # Build each row, one column at a time
        video_cols = []
        for _ in range(cols):
            if i >= n:
                done = True
                break
            # Original on left, recons on right
            video = torch.cat([orig[i], recons[i]], dim=-1)
            video *= 255.0
            video = video.byte()
            video_cols.append(video)
            i += 1
        while len(video_cols) < cols:
            # Append black videos to the empty spaces
            video_cols.append(torch.zeros(video.shape))
        # Concatenate all columns into a row
        video_row = torch.cat(video_cols, dim=-1)
        video_rows.append(video_row)
        if done:
            break

    # Concatenate all rows into a single video
    video_array = torch.cat(video_rows, dim=-2)
    # [T, C, H, W] -> [T, W, H, C] -> [T, H, W, C]
    video_array = torch.transpose(video_array, 1, -1)
    video_array = torch.transpose(video_array, 1, 2)
    # Monochrome to RGB
    video_array = video_array.repeat(1, 1, 1, 3)

    # Export the tensor as a video
    # TODO: improve video quality
    write_video(out_path + '.mp4', video_array, fps)
Example #18
0
def save_video(xseq, path):
    video = xseq.data.cpu().clamp(-1, 1)
    video = ((video + 1.) / 2. * 255).type(torch.uint8).permute(0, 2, 3, 1)
    write_video(path, video, fps=15)
Example #19
0
 def done(self):
     Path(self.directory).mkdir(parents=True, exist_ok=True)
     stream = torch.from_numpy(np.stack(self.t))
     write_video(f'{self.directory}/capture_{self.cap_id}.mp4', stream,
                 24.0)
     self.cap_id += 1
Example #20
0
                                             source_dir,
                                             root_dir='%s/' % source_dir)

one = torch.tensor([source_dataset[0]['image']])
images = torch.tensor([source_dataset[0]['image']])
print(images.shape)

last_choice = 0
last_diff = 0
total_diff = 0
hash_hit_count = 0
count = 0
start = time.time()
for i in range(1, len(source_dataset)):
    print('Source example count = %d' % count)
    count += 1
    t = torch.tensor(source_dataset[i]['image'])
    print(t.shape)
    images = torch.cat((images, one), 0)
    images[i] = t

end = time.time()
print(
    'DONE checking hashes. Processed count = %d in %d seconds, at %d seconds per example'
    % (count, (end - start), (end - start) / len(source_dataset)))

print(images.shape)

filename = '%s/%s.mp4' % (output_dir, output_filename)
write_video(filename, images, np.int(30), video_codec='libx264', options=None)