def __getitem__(self, index):
        os.chdir(self._data_root)
        # siamese label, '0' means same, '1' means diff
        video_pairs = self._videos_list[index]
        pairs_data = []  # ( img1,img2 )

        for video in video_pairs:
            # shapes  (nums,height,width,channels)
            # # process iframe
            frames, _, _ = read_video(video)
            # print(frames.shape)
            if len(frames) == 0:
                print("decode frame failed")
                frames = np.array(np.zeros((self._num_segments, 256, 340, 3)),
                                  dtype=np.float32)

            frames = random_sample(
                frames, self._num_segments) if self._is_train else fix_sample(
                    frames, self._num_segments)
            frames = np.asarray(frames, dtype=np.float32)
            frames = self._iframe_transform(
                frames) if self._is_train else self._infer_transform(frames)
            frames = np.asarray(frames, dtype=np.float32) / 255.0
            frames = np.transpose(frames, (3, 0, 1, 2))
            frames = (frames - self._input_mean) / self._input_std

            pairs_data.append(frames)

        return pairs_data, self._labels_list[index]
Example #2
0
def decode_video_with_av(
    encoded_video: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
    with unittest.mock.patch("torchvision.io.video.os.path.exists",
                             return_value=True):
        return read_video(
            ReadOnlyTensorBuffer(encoded_video))  # type: ignore[arg-type]
    def __getitem__(self, index):

        #cut each video to K segments, then extract a random snippit
        #from each of them, and add these with the label

        video_name = self.videos[index]

        video_frames, _, _ = read_video(video_name)
        length_video = video_frames.shape[0]
        length_of_segment = length_video // self.no_segments
        snippets = torch.zeros((self.no_segments, video_frames.shape[3],
                                video_frames.shape[1], video_frames.shape[2]))
        for i in range(self.no_segments):
            if (self.training):
                start = i * length_of_segment
                finish = min(start + length_of_segment, length_video)
                # for training index is random
                idx = np.random.randint(start, finish)
                snippet = video_frames[idx].permute((2, 0, 1))
                snippets[i] = snippet
            else:
                start = i * length_of_segment
                finish = min(start + length_of_segment, length_video)
                # for testing index is middle of segment
                idx = int(start + length_of_segment // 2)
                snippet = video_frames[idx].permute((2, 0, 1))
                snippets[i] = snippet

        label = self.labels[index]
        # return same labels n_segments time for all segments for training purposes
        returned_labels = [label] * self.no_segments

        return snippets, torch.LongTensor(returned_labels)
Example #4
0
    def extract(self,
                device: torch.device,
                model: torch.nn.Module,
                classifier: torch.nn.Module,
                video_path: Union[str, None] = None) -> Dict[str, np.ndarray]:
        '''The extraction call. Made to clean the forward call a bit.

        Arguments:
            device {torch.device}
            model {torch.nn.Module}
            classifier {torch.nn.Module} -- pre-trained classification layer, will be used if
                                            show_pred is True

        Keyword Arguments:
            video_path {Union[str, None]} -- if you would like to use import it and use it as
                                             "path -> model features"-fashion (default: {None})

        Returns:
            Dict[str, np.ndarray] -- the dict with numpy feature
        '''
        # take the video, change fps and save to the tmp folder
        if self.extraction_fps is not None:
            video_path = reencode_video_with_diff_fps(video_path,
                                                      self.tmp_path,
                                                      self.extraction_fps)

        # read a video
        rgb, audio, info = read_video(video_path, pts_unit='sec')
        # prepare data (first -- transform, then -- unsqueeze)
        rgb = self.transforms(rgb)
        rgb = rgb.unsqueeze(0)
        # slice the
        slices = form_slices(rgb.size(2), self.stack_size, self.step_size)

        vid_feats = []

        for stack_idx, (start_idx, end_idx) in enumerate(slices):
            # inference
            with torch.no_grad():
                output = model(rgb[:, :, start_idx:end_idx, :, :].to(device))
                vid_feats.extend(output.tolist())

                # show predicitons on kinetics dataset (might be useful for debugging)
                if self.show_pred:
                    logits = classifier(output)
                    print(f'{video_path} @ frames ({start_idx}, {end_idx})')
                    show_predictions_on_dataset(logits, 'kinetics')

        feats_dict = {
            self.feature_type: np.array(vid_feats),
        }

        return feats_dict
Example #5
0
def example_load_frame():
    v, a, info = read_video("/home/tt/Videos/VID_20201202_133703_090.mp4",
                            pts_unit='sec')
    print(v.shape)  # torch.Size([467, 1080, 1920, 3])
    # write a frame
    single_frame = v[100]
    print(single_frame.shape)  # torch.Size([1080, 1920, 3])
    single_frame = single_frame.permute(2, 0, 1)  # to CHW
    print(single_frame.shape)
    file_out = os.path.join(out_path, "single_frame.png")
    write_png(single_frame, file_out)
    print("done write to ", file_out)
Example #6
0
def video_test():
    """Test video read/write functions."""
    transformer = transforms.ToPILImage()

    vframes, aframes, info = video.read_video("/tmp/a.mp4")
    # vframes format: [T, H, W, C], data range is [0, 255]

    # H, W, C ==> C, H, W
    image = transformer(vframes[0].permute(2, 0, 1))
    # image data range: [0, 255]

    # tensor = transforms.ToTensor()(image)
    # ==> tensor data range: [0.0, 1.0]

    image.show()
Example #7
0
def predict_sr(model, device, input_video, output_dir):
    """Predict SR model."""
    vframes, aframes, info = read_video(input_video, pts_unit='sec')
    # vframe format: [T, H, W, C], data range:[0,255], good for h5!

    for i in tqdm(range(len(vframes))):
        input_tensor = vframes[i].permute(2, 0, 1).float()
        # input_tensor is tensor, format CxHxW, data range [0.0, 1.0]
        input_tensor.unsqueeze_(0)
        input_tensor = input_tensor.to(device)
        with torch.no_grad():
            output_tensor = model(input_tensor)

        output_tensor.squeeze_(0)
        output_image = tensor_to_image(output_tensor.cpu())
        output_image.save("{}/{:03d}.png".format(output_dir, i))
from torchvision.io.video import read_video, write_video
from torchvision.io.image import write_jpeg
import torch
from args_util import meow_parse
from data_flow import get_predict_video_dataloader
from models import create_model
import os
from visualize_util import save_density_map_normalize, save_density_map

VIDEO_PATH = "/home/tt/Videos/VID_20201204_133931_404.mp4"
OUTPUT_PATH = "/data/my_crowd_image/video_bike_q100"
v, a, info = read_video(VIDEO_PATH, pts_unit='sec')
print(info)
print(v.shape)
length = v.shape[0]
print(length)

count = 0
for i in range(length):
    # if (i% 20 == 0):
    frame = v[i]
    frame = frame.permute(2, 0, 1)
    file_out_path = os.path.join(OUTPUT_PATH, "IMG_" + str(i) + ".jpg")
    write_jpeg(frame, file_out_path, quality=100)
    print(file_out_path)
    def __getitem__(self, index):

        # ============= EXTRACT RGB SNIPPETS ================

        video_name = self.videos_rgb[index]

        video_frames, _, _ = read_video(video_name)
        length_video = video_frames.shape[0]
        length_of_segment = length_video // self.no_segments
        snippets_rgb = torch.zeros(
            (self.no_segments, video_frames.shape[3], video_frames.shape[1],
             video_frames.shape[2]))
        for i in range(self.no_segments):

            start = i * length_of_segment
            # for testing index is middle of segment
            idx = int(start + length_of_segment // 2)
            snippet = video_frames[idx].permute((2, 0, 1))
            snippets_rgb[i] = snippet

        # ============= EXTRACT FLOW SNIPPETS ================

        video_name_flow = self.videos_flow[index]
        #get list of all files in video folder

        flow_images = [
            f for f in listdir(video_name_flow)
            if isfile(join(video_name_flow, f))
        ]

        # read first flow image to get dimensions
        first_image_flow = tf.to_tensor(
            Image.open(video_name_flow + '/' + flow_images[0]))

        length_video = len(flow_images) // 2
        length_of_segment = length_video // self.no_segments
        snippets_flow = torch.zeros(
            (self.no_segments, 5 * 2, first_image_flow.shape[1],
             first_image_flow.shape[2]))
        for i in range(self.no_segments):

            start = i * length_of_segment
            for flow_idx in range(5):
                idx = start + flow_idx
                # add path here
                x_flow_image = Image.open(video_name_flow + '/' +
                                          flow_images[idx])
                x_flow_image = tf.to_tensor(x_flow_image)

                y_flow_image = Image.open(video_name_flow + '/' +
                                          flow_images[idx + length_video])
                y_flow_image = tf.to_tensor(y_flow_image)

                snippets_flow[i][2 * flow_idx] = x_flow_image
                snippets_flow[i][2 * flow_idx + 1] = y_flow_image

        label = self.labels[index]
        # return same labels n_segments time for all segments for training purposes
        returned_labels = [label] * self.no_segments

        return (snippets_rgb, snippets_flow), torch.LongTensor(returned_labels)