def __init__(
        self,
        filename: str,
        trim: Tuple[int, int],
        crop: Tuple[int, int, int, int],
        frame_rate: float = 15,
    ) -> None:
        super().__init__()

        # Get video frames with scikit-video
        reader = FFmpegReader(
            filename + ".mp4",
            inputdict={"-r": str(frame_rate)},
            outputdict={"-r": str(frame_rate)},
        )
        self.frames: np.ndarray = []
        for frame_idx, frame in enumerate(reader.nextFrame()):
            # Trim video (time)
            if frame_idx < trim[0]:
                continue
            if frame_idx >= trim[1]:
                break
            frame_idx += 1

            # Crop frames (space)
            frame = frame[crop[1] : crop[3], crop[0] : crop[2], :]
            self.frames.append(cv2.resize(frame, (140, 140)))

        # Change to NumPy array with PyTorch dimension format
        self.frames = np.array(self.frames, dtype=float)
        self.frames = np.transpose(self.frames, axes=(0, 3, 1, 2))

        y, _ = librosa.load(filename + ".wav", sr=2000)
        D = librosa.core.stft(y, n_fft=510)
        self.samples = np.abs(D)
コード例 #2
0
    def read_video(self, path):
        # Return: Numpy.ndarray 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>)
        capt = FFmpegReader(filename=path)
        self.fps = int(capt.inputfps)
        list_of_frames = []

        for index, frame in enumerate(capt.nextFrame()):
            # frame -> (<height>, <width>, 3)
            capture_frame = True
            if self.required_fps != None:
                is_valid = range(self.required_fps)
                capture_frame = (index % self.fps) in is_valid

            if capture_frame:
                if self.target_size is not None:
                    temp_image = image.array_to_img(frame)
                    frame = image.img_to_array(
                        temp_image.resize(self.target_size,
                                          Image.ANTIALIAS)).astype('uint8')
                list_of_frames.append(frame)
        temp_video = np.stack(list_of_frames)
        capt.close()
        if self.to_gray:
            temp_video = rgb2gray(temp_video)
        if self.max_frames is not None:
            temp_video = self.process_video(video=temp_video)
        return np.expand_dims(temp_video, axis=0)
コード例 #3
0
    def _get_frame(self, seek, video_idx, last):

        opened_video = None  # handle to opened target video
        if self.opened_videos[
                video_idx]:  # if handle(s) exists for target video
            current = self.opened_videos[video_idx]  # get handles list
            opened_video = next((ov for ov in current if ov[0] == seek),
                                None)  # look for matching seek

        if opened_video is None:  # no (matching) handle found
            video_path = join(self.root,
                              self.videos[video_idx][1][0])  # build video path
            video_file = FFmpegReader(video_path)  # get a video file pointer
            video_iter = video_file.nextFrame()  # get an iterator
            opened_video = [seek,
                            islice(video_iter, seek, None),
                            video_file]  # seek video and create o.v. item
            self.opened_videos[video_idx].append(
                opened_video)  # add opened video object to o.v. list

        opened_video[0] = seek + 1  # update seek pointer
        frame = next(opened_video[1])  # cache output frame
        if last:
            opened_video[2]._close()  # close video file (private method?!)
            self.opened_videos[video_idx].remove(
                opened_video)  # remove o.v. item

        return frame
コード例 #4
0
ファイル: sample.py プロジェクト: jiqiujia/l3embedding
def read_video(video_path):
    """
    Read a video file as a numpy array

    Resizes frames so that the minimum side is 256 pixels

    Args:
        video_path: Path to video file

    Returns:
        video: Numpy data array

    """
    vinfo = ffprobe(video_path)['video']
    width = int(vinfo['@width'])
    height = int(vinfo['@height'])

    scaling = 256.0 / min(width, height)
    new_width = int(math.ceil(scaling * width))
    new_height = int(math.ceil(scaling * height))

    # Resize frames
    reader = FFmpegReader(video_path,
                          outputdict={'-s': "{}x{}".format(new_width,
                                                           new_height) })

    frames = []
    for frame in reader.nextFrame():
        frames.append(frame)
    reader.close()
    return frames
コード例 #5
0
def PreProcessVideo(fmt, filename, output, start=250, n_frames=5):
    info = ffprobe(filename)
    vinfo = info['video']

    v = FFmpegReader(filename, outputdict={'-pix_fmt': fmt})

    X = np.ndarray((int(vinfo['@height']) * int(vinfo['@width']) * 5, 6))
    n = 0
    t = 0
    frames = v.nextFrame()
    for t, frame in enumerate(frames):
        if t < start:
            continue
        if t >= start + n_frames:
            break
        print(t)
        sys.stdout.flush()
        printed = False
        for row_n, line in enumerate(frame):
            for col_n, pixel in enumerate(line):
                c1, c2, c3 = pixel
                t_scaled = (float(t - start) / float(vinfo['@width'])) * 255.0
                x_scaled = (float(col_n) / float(vinfo['@width'])) * 255.0
                y_scaled = (float(row_n) / float(vinfo['@width'])) * 255.0
                X[n] = np.array([t_scaled, x_scaled, y_scaled, c1, c2, c3])
                n += 1

    print("Done with the encode part")
    np.save(output, X, allow_pickle=False, fix_imports=False)
コード例 #6
0
def iterate_video(filename, x1, y1, x2, y2, x3, y3, x4, y4, down_scale=True):
    """
    itereer over alle frames van de video
    tel het aantal wagens die door een van de twee rechthoeken rijden
    de visualizatie wordt opgeslaan als video in trafic.avi
    druk Q om te stoppen
    :param filename: bestandsnaam van de video
    :param x1, y1, x2, y2: twee hoekpunten van de eerste rechthoek
    :param x3, y3, x4, y4: twee hoekpunten van de tweede rechthoek
    :param down_scale: boolean: als True wordt de resolutie van de video gehalveerd
    :return: None
    """
    queue = collections.deque()
    if not os.path.isfile(filename):
        raise Exception("file not found")
    reader = FFmpegReader(filename)
    shape = reader.getShape()[1:3]
    if down_scale:
        shape = [shape[0] // 2, shape[1] // 2]
    stepsize = 5
    video_writer = cv2.VideoWriter('traffic.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (shape[1], shape[0]))
    for frame in reader.nextFrame():
        if down_scale:
            frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
        queue.append(frame[:, :, ::-1])
        if len(queue) > 2 * stepsize:
            res = traffic(queue[0], queue[stepsize], queue[stepsize * 2], x1, y1, x2, y2, x3, y3, x4, y4)
            cv2.imshow("Traffic", res)
            k = cv2.waitKey(1)
            queue.popleft()
            video_writer.write(res)
            if k == 113:
                # press Q to break
                break
    video_writer.release()
コード例 #7
0
    def get_frames(self, filename, wanted):
        v = FFmpegReader(filename)  # , outputdict={'-pix_fmt': 'yuv444p'})

        frames = None
        n_frames = 0
        for n, frame in enumerate(v.nextFrame()):
            # the FFmpegReader API actually renders every frame; so it's rather
            # slow; but it ensures that every frame is rendered, not just
            # i-frames... getting i-frames would be faster, but might increase
            # false-negative rate due to picking out different frames from
            # different encodings
            if n not in wanted:
                continue
            if frames is None:
                frames = np.ndarray(shape=(self.grab_n_frames, ) + frame.shape,
                                    dtype=np.float64)

            frames[n_frames] = frame
            n_frames += 1
            if n_frames == self.grab_n_frames:
                break
        v.close()

        if n_frames != self.grab_n_frames:
            raise RuntimeError(
                'Video has invalid number of frames: {}: {}'.format(
                    filename, len(frames)))
        frames = self._crop_bars(frames)
        return [
            self.process_frame(n, filename, frame)
            for n, frame in enumerate(frames)
        ]
コード例 #8
0
    def __getitem__(self, index):
        item = self.json_data[index]

        framerate_sampled = self.augmentor.jitter_fps(FRAMERATE)

        optional_args = {"-r": "%d" % framerate_sampled}
        duration = self.get_duration(item.path)

        if duration is not None:
            nframes = int(duration * framerate_sampled)
            optional_args["-vframes"] = "%d" % nframes

        # Open video file
        reader = FFmpegReader(item.path,
                              inputdict={},
                              outputdict=optional_args)

        try:
            imgs = []
            for img in reader.nextFrame():
                imgs.append(img)
        except (RuntimeError, ZeroDivisionError) as exception:
            print('{}: WEBM reader cannot open {}. Empty '
                  'list returned.'.format(type(exception).__name__, item.path))

        imgs = self.transform_pre(imgs)
        imgs, label = self.augmentor(imgs, item.label)
        imgs = self.transform_post(imgs)

        num_frames = len(imgs)
        target_idx = self.classes_dict[label]

        if self.nclips > -1:
            num_frames_necessary = self.clip_size * self.nclips * self.step_size
        else:
            num_frames_necessary = num_frames
        offset = 0
        if num_frames_necessary < num_frames:
            # If there are more frames, then sample starting offset.
            diff = (num_frames - num_frames_necessary)
            # temporal augmentation
            if not self.is_val:
                offset = np.random.randint(0, diff)

        imgs = imgs[offset:num_frames_necessary + offset:self.step_size]

        if len(imgs) < (self.clip_size * self.nclips):
            imgs.extend([imgs[-1]] *
                        ((self.clip_size * self.nclips) - len(imgs)))

        # format data to torch
        data = torch.stack(imgs)
        data = data.permute(1, 0, 2, 3)
        if self.get_item_id:
            return (data, target_idx, item.id)
        else:
            return (data, target_idx)
    def __init__(
        self,
        filenames: List[str],
        trims: List[Tuple[int, int]],
        crops: List[Tuple[int, int, int, int]],
        frame_rate: float = 15,
    ):
        # TDCCMCDataset is an unconvential dataset, where each data is
        # dynamically sampled whenever needed instead of a static dataset.
        # Therefore, in `__init__`, we do not define a static dataset. Instead,
        # we simply preprocess the video and audio for faster `__getitem__`.

        super().__init__()

        self.sources: List[Tuple[np.ndarray, np.ndarray]] = []
        for filename, trim, crop in zip(filenames, trims, crops):
            # Get video frames with scikit-video
            reader = FFmpegReader(
                filename + ".mp4",
                inputdict={"-r": str(frame_rate)},
                outputdict={"-r": str(frame_rate)},
            )
            frames = []
            for frame_idx, frame in enumerate(reader.nextFrame()):
                # Trim video (time)
                if frame_idx < trim[0]:
                    continue
                if frame_idx >= trim[1]:
                    break

                # Crop frames (space)
                frame = frame[crop[1]:crop[3], crop[0]:crop[2], :]
                frames.append(cv2.resize(frame, (140, 140)))

            # Change to NumPy array with PyTorch dimension format
            frames = np.array(frames, dtype=float)
            frames = np.transpose(frames, axes=(0, 3, 1, 2))

            # STFT audio
            # TODO Magic number sr=2000, n_fft=510
            y, _ = librosa.load(filename + ".wav", sr=2000)
            D = librosa.core.stft(y, n_fft=510)
            D = np.abs(D)

            # Save video frames and audio
            self.sources.append((frames, D))
コード例 #10
0
    def _read_video(self, path):
        """
        Parameters:
            path (str): Required
                Path of the video to be read

        Returns:
            Numpy.ndarray
                A 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>)
        """

        cap = FFmpegReader(filename=path)
        list_of_frames = []
        self.fps = int(cap.inputfps)                  # Frame Rate

        for index, frame in enumerate(cap.nextFrame()):

            capture_frame = True
            if self.required_fps != None:
                is_valid = range(self.required_fps)
                capture_frame = (index % self.fps) in is_valid

            if capture_frame:

                if self.target_size is not None:
                    temp_image = image.array_to_img(frame)
                    frame = image.img_to_array(
                        temp_image.resize(
                            self.target_size,
                            Image.ANTIALIAS)).astype('uint8')

                # Shape of each frame -> (<height>, <width>, 3)
                list_of_frames.append(frame)

        temp_video = np.stack(list_of_frames)
        cap.close()

        if self.to_gray:
            temp_video = rgb2gray(temp_video)
                
        if self.max_frames is not None:
            temp_video = self._process_video(video=temp_video)

        return temp_video
コード例 #11
0
    vid = FFmpegReader(fnm)
    # vid = cv2.VideoCapture(fnm)

    # success, images = vid.read()
    # print(success)
    # images = [images]
    # images = []

    # split_num = 30
    # frame_num = 2950
    # split_size = frame_num // split_num

    val_split = 2200

    count = 0
    for frame_num, frame in enumerate(vid.nextFrame()):
        # success, next_image = vid.read()
        if frame_num % 100 == 0:
            print(frame_num)
        if frame_num < val_split:
            imsave('/mnt/data/pigs/imgs/{}/{}.png'.format(num, frame_num),
                   frame)
        else:
            imsave(
                '/mnt/data/pigs/val_imgs/{}/{}.png'.format(
                    num, frame_num - val_split), frame)

        # if len(images) == split_size:
        # print('Saving data/%d-%d.npy' % (num, count))
        # np.save('/mnt/data/pigs/train/%d-%d.npy' % (num, count),
        #         np.array(images))
コード例 #12
0
def get_mp4_frames(mp4_path, skip_frames, num_frames_per_event, 
						do_flip, brighten_val, is_high_res, do_aug):
    
    # Get mp4 reader
    try:
        reader = FFmpegReader(mp4_path)     
    except Exception as e:
        if PRINT_ERRORS:
            print(e)
			
        return None
        
    # Get starting frame and offsets
    frame_shape = EXPECTED_HIGH_RES if is_high_res else EXPECTED_LOW_RES            
    start_frame = (reader.inputframenum - (num_frames_per_event * skip_frames)) // 2
    
    if start_frame <= 0:
        reader.close()
        return None

    start_x = int((frame_shape[0] - reader.outputheight) // 2)
    if start_x < 0:
        reader.close()
        return None
        
    start_y = int((frame_shape[1] - reader.outputwidth) // 2)
    if start_y < 0:
        reader.close()
        return None
        
    start_z = int((frame_shape[2] - reader.outputdepth) // 2)    
    if start_z < 0:
        reader.close()
        return None
        
    # Put middle (num_frames_per_event * skip_frames) input frames in numpy array
    cur_i = 0
    cur_frame = 0                
    
    frame_array = np.zeros(shape=((num_frames_per_event, ) + 
                                        frame_shape), dtype=np.uint8)
    
    for frame in reader.nextFrame():
        if cur_frame >= start_frame:    
            cur_offset = cur_frame - start_frame
            if cur_i < num_frames_per_event and (cur_offset % skip_frames) == 0:
                frame_array[cur_i, 
                                start_x:start_x+reader.outputheight, 
                                start_y:start_y+reader.outputwidth,
                                start_z:start_z+reader.outputdepth] = frame
				
                if brighten_val < 1.0:
				    frame_array[cur_i, :, :, :] = adj_brightness(frame_array[cur_i, :, :, :], brighten_val)
                                                                        
                if do_flip:
                    frame_array[cur_i, :, :, :] = hflip_img(frame_array[cur_i, :, :, :])                    
                    
                cur_i += 1
                
        cur_frame += 1
        
    reader.close()    
        
    # Return array with frames
    return frame_array
コード例 #13
0
                    nClasses, subDir, nFrames, nVideos))

                # Create class directories if they do not exist
                classDir = os.path.join(rootDirSave, subDir)
                if not os.path.exists(classDir):
                    os.makedirs(classDir)

                pbar3 = trange(nFrames,
                               ncols=100,
                               position=4,
                               desc='Video progress        ')
                frameCount = 0
                # Tensor to save all the frames of a video
                frameCollection = torch.FloatTensor(nFrames // args.skip, 3,
                                                    iHeight, iWidth)
                for frame in reader.nextFrame():  # Garb each frame
                    frameCount += 1
                    if (frameCount % args.skip) == 0:
                        # Original resolution -> desired resolution
                        tempImg = resize(frame, (iHeight, iWidth))

                        if getImgs:
                            imgName = '{:02}_{:04}.png'.format(
                                nVideos, frameCount)
                            # Ignore warning regarding float64 being converted into uint8
                            with warnings.catch_warnings():
                                warnings.simplefilter("ignore")
                                imsave(
                                    os.path.join(rootDirSave, subDir, imgName),
                                    tempImg)