def read_video(self, path):
        # Return: Numpy.ndarray 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>)
        capt = FFmpegReader(filename=path)
        self.fps = int(capt.inputfps)
        list_of_frames = []

        for index, frame in enumerate(capt.nextFrame()):
            # frame -> (<height>, <width>, 3)
            capture_frame = True
            if self.required_fps != None:
                is_valid = range(self.required_fps)
                capture_frame = (index % self.fps) in is_valid

            if capture_frame:
                if self.target_size is not None:
                    temp_image = image.array_to_img(frame)
                    frame = image.img_to_array(
                        temp_image.resize(self.target_size,
                                          Image.ANTIALIAS)).astype('uint8')
                list_of_frames.append(frame)
        temp_video = np.stack(list_of_frames)
        capt.close()
        if self.to_gray:
            temp_video = rgb2gray(temp_video)
        if self.max_frames is not None:
            temp_video = self.process_video(video=temp_video)
        return np.expand_dims(temp_video, axis=0)
Esempio n. 2
0
    def get_frames(self, filename, wanted):
        v = FFmpegReader(filename)  # , outputdict={'-pix_fmt': 'yuv444p'})

        frames = None
        n_frames = 0
        for n, frame in enumerate(v.nextFrame()):
            # the FFmpegReader API actually renders every frame; so it's rather
            # slow; but it ensures that every frame is rendered, not just
            # i-frames... getting i-frames would be faster, but might increase
            # false-negative rate due to picking out different frames from
            # different encodings
            if n not in wanted:
                continue
            if frames is None:
                frames = np.ndarray(shape=(self.grab_n_frames, ) + frame.shape,
                                    dtype=np.float64)

            frames[n_frames] = frame
            n_frames += 1
            if n_frames == self.grab_n_frames:
                break
        v.close()

        if n_frames != self.grab_n_frames:
            raise RuntimeError(
                'Video has invalid number of frames: {}: {}'.format(
                    filename, len(frames)))
        frames = self._crop_bars(frames)
        return [
            self.process_frame(n, filename, frame)
            for n, frame in enumerate(frames)
        ]
def iterate_video(filename, x1, y1, x2, y2, x3, y3, x4, y4, down_scale=True):
    """
    itereer over alle frames van de video
    tel het aantal wagens die door een van de twee rechthoeken rijden
    de visualizatie wordt opgeslaan als video in trafic.avi
    druk Q om te stoppen
    :param filename: bestandsnaam van de video
    :param x1, y1, x2, y2: twee hoekpunten van de eerste rechthoek
    :param x3, y3, x4, y4: twee hoekpunten van de tweede rechthoek
    :param down_scale: boolean: als True wordt de resolutie van de video gehalveerd
    :return: None
    """
    queue = collections.deque()
    if not os.path.isfile(filename):
        raise Exception("file not found")
    reader = FFmpegReader(filename)
    shape = reader.getShape()[1:3]
    if down_scale:
        shape = [shape[0] // 2, shape[1] // 2]
    stepsize = 5
    video_writer = cv2.VideoWriter('traffic.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (shape[1], shape[0]))
    for frame in reader.nextFrame():
        if down_scale:
            frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
        queue.append(frame[:, :, ::-1])
        if len(queue) > 2 * stepsize:
            res = traffic(queue[0], queue[stepsize], queue[stepsize * 2], x1, y1, x2, y2, x3, y3, x4, y4)
            cv2.imshow("Traffic", res)
            k = cv2.waitKey(1)
            queue.popleft()
            video_writer.write(res)
            if k == 113:
                # press Q to break
                break
    video_writer.release()
Esempio n. 4
0
def PreProcessVideo(fmt, filename, output, start=250, n_frames=5):
    info = ffprobe(filename)
    vinfo = info['video']

    v = FFmpegReader(filename, outputdict={'-pix_fmt': fmt})

    X = np.ndarray((int(vinfo['@height']) * int(vinfo['@width']) * 5, 6))
    n = 0
    t = 0
    frames = v.nextFrame()
    for t, frame in enumerate(frames):
        if t < start:
            continue
        if t >= start + n_frames:
            break
        print(t)
        sys.stdout.flush()
        printed = False
        for row_n, line in enumerate(frame):
            for col_n, pixel in enumerate(line):
                c1, c2, c3 = pixel
                t_scaled = (float(t - start) / float(vinfo['@width'])) * 255.0
                x_scaled = (float(col_n) / float(vinfo['@width'])) * 255.0
                y_scaled = (float(row_n) / float(vinfo['@width'])) * 255.0
                X[n] = np.array([t_scaled, x_scaled, y_scaled, c1, c2, c3])
                n += 1

    print("Done with the encode part")
    np.save(output, X, allow_pickle=False, fix_imports=False)
Esempio n. 5
0
def read_video(video_path):
    """
    Read a video file as a numpy array

    Resizes frames so that the minimum side is 256 pixels

    Args:
        video_path: Path to video file

    Returns:
        video: Numpy data array

    """
    vinfo = ffprobe(video_path)['video']
    width = int(vinfo['@width'])
    height = int(vinfo['@height'])

    scaling = 256.0 / min(width, height)
    new_width = int(math.ceil(scaling * width))
    new_height = int(math.ceil(scaling * height))

    # Resize frames
    reader = FFmpegReader(video_path,
                          outputdict={'-s': "{}x{}".format(new_width,
                                                           new_height) })

    frames = []
    for frame in reader.nextFrame():
        frames.append(frame)
    reader.close()
    return frames
    def __init__(
        self,
        filename: str,
        trim: Tuple[int, int],
        crop: Tuple[int, int, int, int],
        frame_rate: float = 15,
    ) -> None:
        super().__init__()

        # Get video frames with scikit-video
        reader = FFmpegReader(
            filename + ".mp4",
            inputdict={"-r": str(frame_rate)},
            outputdict={"-r": str(frame_rate)},
        )
        self.frames: np.ndarray = []
        for frame_idx, frame in enumerate(reader.nextFrame()):
            # Trim video (time)
            if frame_idx < trim[0]:
                continue
            if frame_idx >= trim[1]:
                break
            frame_idx += 1

            # Crop frames (space)
            frame = frame[crop[1] : crop[3], crop[0] : crop[2], :]
            self.frames.append(cv2.resize(frame, (140, 140)))

        # Change to NumPy array with PyTorch dimension format
        self.frames = np.array(self.frames, dtype=float)
        self.frames = np.transpose(self.frames, axes=(0, 3, 1, 2))

        y, _ = librosa.load(filename + ".wav", sr=2000)
        D = librosa.core.stft(y, n_fft=510)
        self.samples = np.abs(D)
Esempio n. 7
0
    def _get_frame(self, seek, video_idx, last):

        opened_video = None  # handle to opened target video
        if self.opened_videos[
                video_idx]:  # if handle(s) exists for target video
            current = self.opened_videos[video_idx]  # get handles list
            opened_video = next((ov for ov in current if ov[0] == seek),
                                None)  # look for matching seek

        if opened_video is None:  # no (matching) handle found
            video_path = join(self.root,
                              self.videos[video_idx][1][0])  # build video path
            video_file = FFmpegReader(video_path)  # get a video file pointer
            video_iter = video_file.nextFrame()  # get an iterator
            opened_video = [seek,
                            islice(video_iter, seek, None),
                            video_file]  # seek video and create o.v. item
            self.opened_videos[video_idx].append(
                opened_video)  # add opened video object to o.v. list

        opened_video[0] = seek + 1  # update seek pointer
        frame = next(opened_video[1])  # cache output frame
        if last:
            opened_video[2]._close()  # close video file (private method?!)
            self.opened_videos[video_idx].remove(
                opened_video)  # remove o.v. item

        return frame
Esempio n. 8
0
def read_mj2_frames(fname):
    from skvideo.io import FFmpegReader
    sq = FFmpegReader(fname, outputdict={'-pix_fmt': 'gray16le'})
    imgs = []
    for s in sq:
        imgs.append(s)
    sq.close()
    return np.stack(imgs).squeeze()
Esempio n. 9
0
    def __getitem__(self, index):
        item = self.json_data[index]

        framerate_sampled = self.augmentor.jitter_fps(FRAMERATE)

        optional_args = {"-r": "%d" % framerate_sampled}
        duration = self.get_duration(item.path)

        if duration is not None:
            nframes = int(duration * framerate_sampled)
            optional_args["-vframes"] = "%d" % nframes

        # Open video file
        reader = FFmpegReader(item.path,
                              inputdict={},
                              outputdict=optional_args)

        try:
            imgs = []
            for img in reader.nextFrame():
                imgs.append(img)
        except (RuntimeError, ZeroDivisionError) as exception:
            print('{}: WEBM reader cannot open {}. Empty '
                  'list returned.'.format(type(exception).__name__, item.path))

        imgs = self.transform_pre(imgs)
        imgs, label = self.augmentor(imgs, item.label)
        imgs = self.transform_post(imgs)

        num_frames = len(imgs)
        target_idx = self.classes_dict[label]

        if self.nclips > -1:
            num_frames_necessary = self.clip_size * self.nclips * self.step_size
        else:
            num_frames_necessary = num_frames
        offset = 0
        if num_frames_necessary < num_frames:
            # If there are more frames, then sample starting offset.
            diff = (num_frames - num_frames_necessary)
            # temporal augmentation
            if not self.is_val:
                offset = np.random.randint(0, diff)

        imgs = imgs[offset:num_frames_necessary + offset:self.step_size]

        if len(imgs) < (self.clip_size * self.nclips):
            imgs.extend([imgs[-1]] *
                        ((self.clip_size * self.nclips) - len(imgs)))

        # format data to torch
        data = torch.stack(imgs)
        data = data.permute(1, 0, 2, 3)
        if self.get_item_id:
            return (data, target_idx, item.id)
        else:
            return (data, target_idx)
Esempio n. 10
0
def image_streamer(sources, start=0, remap_func=None):
    """A generator that produces image frames from multiple sources.
    Currently accepts video, images and COCO datasets and globs of these.

        sources: list of str; The file paths to the image sources.
                 Can be an image, video or COCO json, globs accepted.
        start: int (optional); Start from this position in the list.
        remap_func: lambda or function; A function that accepts a filename
                    parameter and outputs the path to the file. Used to
                    change relative directories of COCO datasets.


    """
    from warnings import warn
    from glob import glob
    from skvideo.io import FFmpegReader
    from contextlib import closing, redirect_stdout

    def is_image(path):
        return path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff'))

    def is_video(path):
        return path.lower().endswith(('.avi', '.mpg', '.mp4'))

    remap_func = remap_func or (lambda x: x)

    # Expand any globbed paths, but not for images since we want to keep the sequence
    full_sources = []
    for source in sources:
        if '*' in source and not is_image(source):
            full_sources += glob(source, recursive=True)
        else:
            full_sources.append(source)

    for source in full_sources[start:]:
        if is_video(source):
            with closing(FFmpegReader(source)) as reader:
                for frame_no, frame in enumerate(reader.nextFrame()):
                    yield source, frame_no, frame
        elif is_image(source):
            for frame_no, image_path in enumerate(glob(source,
                                                       recursive=True)):
                yield image_path, frame_no, imread(remap_func(image_path))
        elif source.endswith('.json'):
            # COCO database
            with redirect_stdout(None):
                coco = COCO(source)
            for frame_no, image in enumerate(coco.loadImgs(coco.getImgIds())):
                # TODO: It's not clear how to address relative paths
                image_path = image['path'] if 'path' in image else remap_func(
                    image['file_name'])
                yield image_path, frame_no, imread(image_path)
            del coco
        else:
            warn("Skipped an unknown source type {}.".format(source))
Esempio n. 11
0
    def __init__(
            self,
            filenames,
            extensions=[
                '.avi', '.mov', '.mj2'
            ],  # this will try this extension first and then .tif, .TIFF and .TIF
            extension=None,
            nchannels=None):
        '''
        Select a stack from a sequence of mov stack files

        '''
        self.extension = extension
        if type(filenames) is str:
            # check if it is a folder
            if os.path.isdir(filenames):
                dirname = filenames
                filenames = []
                for extension in extensions:
                    if not len(filenames):  # try other
                        self.extension = extension
                        filenames = natsorted(
                            glob(pjoin(dirname, '*' + self.extension)))
        if not len(filenames):
            raise (OSError('Could not find files.'))
        super(VideoStack, self).__init__(filenames, extension)
        from skvideo.io import FFmpegReader
        self.reader = FFmpegReader
        offsets = [0]
        for fname in self.filenames:
            # Parse all files in the stack
            with FFmpegReader(fname) as f:
                dims = f.getShape()[:-1]
                if f.pix_fmt == 'gray16le':
                    dtype = 'uint16'
                else:
                    dtype = 'uint8'
                self.pix_fmt = f.pix_fmt
                offsets.append(dims[0])
                self.framerate = f.inputfps
        # offset for each file
        self.frames_offset = np.cumsum(offsets)
        if nchannels is None:
            nchannels = 1
        self.frames_offset = (self.frames_offset / nchannels).astype(int)
        self.dims = dims[1:]
        self.dims = [nchannels, *self.dims]
        self.dtype = dtype
        self.nframes = self.frames_offset[-1]
        self.shape = tuple([self.nframes, *self.dims])
        self.current_fileidx = -1
        self.current_frameidx = 0
    def __init__(
        self,
        filenames: List[str],
        trims: List[Tuple[int, int]],
        crops: List[Tuple[int, int, int, int]],
        frame_rate: float = 15,
    ):
        # TDCCMCDataset is an unconvential dataset, where each data is
        # dynamically sampled whenever needed instead of a static dataset.
        # Therefore, in `__init__`, we do not define a static dataset. Instead,
        # we simply preprocess the video and audio for faster `__getitem__`.

        super().__init__()

        self.sources: List[Tuple[np.ndarray, np.ndarray]] = []
        for filename, trim, crop in zip(filenames, trims, crops):
            # Get video frames with scikit-video
            reader = FFmpegReader(
                filename + ".mp4",
                inputdict={"-r": str(frame_rate)},
                outputdict={"-r": str(frame_rate)},
            )
            frames = []
            for frame_idx, frame in enumerate(reader.nextFrame()):
                # Trim video (time)
                if frame_idx < trim[0]:
                    continue
                if frame_idx >= trim[1]:
                    break

                # Crop frames (space)
                frame = frame[crop[1]:crop[3], crop[0]:crop[2], :]
                frames.append(cv2.resize(frame, (140, 140)))

            # Change to NumPy array with PyTorch dimension format
            frames = np.array(frames, dtype=float)
            frames = np.transpose(frames, axes=(0, 3, 1, 2))

            # STFT audio
            # TODO Magic number sr=2000, n_fft=510
            y, _ = librosa.load(filename + ".wav", sr=2000)
            D = librosa.core.stft(y, n_fft=510)
            D = np.abs(D)

            # Save video frames and audio
            self.sources.append((frames, D))
Esempio n. 13
0
    def _read_video(self, path):
        """
        Parameters:
            path (str): Required
                Path of the video to be read

        Returns:
            Numpy.ndarray
                A 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>)
        """

        cap = FFmpegReader(filename=path)
        list_of_frames = []
        self.fps = int(cap.inputfps)                  # Frame Rate

        for index, frame in enumerate(cap.nextFrame()):

            capture_frame = True
            if self.required_fps != None:
                is_valid = range(self.required_fps)
                capture_frame = (index % self.fps) in is_valid

            if capture_frame:

                if self.target_size is not None:
                    temp_image = image.array_to_img(frame)
                    frame = image.img_to_array(
                        temp_image.resize(
                            self.target_size,
                            Image.ANTIALIAS)).astype('uint8')

                # Shape of each frame -> (<height>, <width>, 3)
                list_of_frames.append(frame)

        temp_video = np.stack(list_of_frames)
        cap.close()

        if self.to_gray:
            temp_video = rgb2gray(temp_video)
                
        if self.max_frames is not None:
            temp_video = self._process_video(video=temp_video)

        return temp_video
Esempio n. 14
0
    def get_frame_count(self, paths):
        """
        Can be used to determine the value of `max_frames`

        Parameters:
            paths (list): Required
                 A list of paths of the videos to be read

        Returns:
            dict (python dictionary)
                For each video, the total number of frames in that video is stored in the dictionary.
        """

        frame_count = {}
        for path in paths:
            cap = FFmpegReader(filename=path)
            frame_count[path] = cap.inputframenum
            cap.close()

        return frame_count
Esempio n. 15
0
    def __init__(
        self,
        filenames: List[str],
        trims: List[Tuple[int, int]],
        crops: List[Tuple[int, int, int, int]],
        frame_rate: float = 15,
    ):
        # TDCCMCDataset is an unconvential dataset, where each data is
        # dynamically sampled whenever needed instead of a static dataset.
        # Therefore, in `__init__`, we do not define a static dataset. Instead,
        # we simply preprocess the video and audio for faster `__getitem__`.

        super().__init__()
        self.filenames = filenames
        self.trims = trims
        self.crops = crops

        self.audios: List[np.ndarray] = []
        self.readers: List[Any] = []
        for filename in filenames:
            # Get video frames with scikit-video
            reader = FFmpegReader(
                filename + ".mp4",
                inputdict={"-r": str(frame_rate)},
                outputdict={"-r": str(frame_rate)},
            )
            self.readers.append(reader)

            # STFT audio
            # TODO Magic number sr=2000, n_fft=510
            y, _ = librosa.load(filename + ".wav", sr=2000)
            D = librosa.core.stft(y, n_fft=510)
            D = np.abs(D)

            # Save audio
            self.audios.append(D)
Esempio n. 16
0
        type=float,
        default=0,
        help="seconds to skip at the begining of the video. Default 0.")
    parser.add_argument("--ouput",
                        type=str,
                        default='output.csv',
                        help="Name of the output file. Default output.csv.")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG)
    file_path = args.file_path
    output_file_name = args.ouput
    with open(output_file_name, 'w') as csvfile:
        fieldnames = ['frame', 'x', 'y']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    metadata = ffprobe(file_path)['video']
    fr = float(metadata['@r_frame_rate'].split('/')[0]) / float(
        metadata['@r_frame_rate'].split('/')[1])
    nframes = float(metadata['@duration_ts'])
    time_length = float(metadata['@duration'])
    frame_shape = (int(metadata['@height']), int(metadata['@width']), 3)
    skip = args.skip
    SIZE = frame_shape[:-1][::-1]

    secs = skip / fr
    video = enumerate(FFmpegReader(file_path, inputdict={'-ss': str(secs)}),
                      skip)

    main(video)
Esempio n. 17
0
 def infer_video(paths):
     item_path, item_ann_path = paths
     vreader = FFmpegReader(item_path)
     return (vreader.getShape(), )
Esempio n. 18
0
import os
# import numpy as np

for num in range(1, 31):
    fnm = '/home/alex/Downloads/train/%d.mp4' % num
    print(fnm)
    try:
        shutil.rmtree('/mnt/data/pigs/imgs/{}'.format(num))
        shutil.rmtree('/mnt/data/pigs/val_imgs/{}'.format(num))
    except:
        pass

    os.mkdir('/mnt/data/pigs/imgs/{}'.format(num))
    os.mkdir('/mnt/data/pigs/val_imgs/{}'.format(num))

    vid = FFmpegReader(fnm)
    # vid = cv2.VideoCapture(fnm)

    # success, images = vid.read()
    # print(success)
    # images = [images]
    # images = []

    # split_num = 30
    # frame_num = 2950
    # split_size = frame_num // split_num

    val_split = 2200

    count = 0
    for frame_num, frame in enumerate(vid.nextFrame()):
Esempio n. 19
0
def get_mp4_frames(mp4_path, skip_frames, num_frames_per_event, 
						do_flip, brighten_val, is_high_res, do_aug):
    
    # Get mp4 reader
    try:
        reader = FFmpegReader(mp4_path)     
    except Exception as e:
        if PRINT_ERRORS:
            print(e)
			
        return None
        
    # Get starting frame and offsets
    frame_shape = EXPECTED_HIGH_RES if is_high_res else EXPECTED_LOW_RES            
    start_frame = (reader.inputframenum - (num_frames_per_event * skip_frames)) // 2
    
    if start_frame <= 0:
        reader.close()
        return None

    start_x = int((frame_shape[0] - reader.outputheight) // 2)
    if start_x < 0:
        reader.close()
        return None
        
    start_y = int((frame_shape[1] - reader.outputwidth) // 2)
    if start_y < 0:
        reader.close()
        return None
        
    start_z = int((frame_shape[2] - reader.outputdepth) // 2)    
    if start_z < 0:
        reader.close()
        return None
        
    # Put middle (num_frames_per_event * skip_frames) input frames in numpy array
    cur_i = 0
    cur_frame = 0                
    
    frame_array = np.zeros(shape=((num_frames_per_event, ) + 
                                        frame_shape), dtype=np.uint8)
    
    for frame in reader.nextFrame():
        if cur_frame >= start_frame:    
            cur_offset = cur_frame - start_frame
            if cur_i < num_frames_per_event and (cur_offset % skip_frames) == 0:
                frame_array[cur_i, 
                                start_x:start_x+reader.outputheight, 
                                start_y:start_y+reader.outputwidth,
                                start_z:start_z+reader.outputdepth] = frame
				
                if brighten_val < 1.0:
				    frame_array[cur_i, :, :, :] = adj_brightness(frame_array[cur_i, :, :, :], brighten_val)
                                                                        
                if do_flip:
                    frame_array[cur_i, :, :, :] = hflip_img(frame_array[cur_i, :, :, :])                    
                    
                cur_i += 1
                
        cur_frame += 1
        
    reader.close()    
        
    # Return array with frames
    return frame_array
Esempio n. 20
0
    if os.path.isdir(os.path.join(rootDirLoad,
                                  subDir)):  # Check if it is a folder
        classes.append(subDir)  # Create a path
        files = os.listdir(os.path.join(rootDirLoad, subDir))
        nVideos = 0  # Videos in class X

        pbar2 = trange(len(files),
                       ncols=100,
                       position=2,
                       desc='Within-class progress ')

        for file in files:  # Get all the videos
            if file.lower().endswith('.avi') or file.lower().endswith('.mp4'):
                filename = os.path.join(rootDirLoad, subDir, file)
                reader = FFmpegReader(filename)
                nFrames = reader.getShape()[0]

                nVideos += 1
                dataList.write('\n{:<8} {:12} {:<12} {:02}.pyt'.format(
                    nClasses, subDir, nFrames, nVideos))

                # Create class directories if they do not exist
                classDir = os.path.join(rootDirSave, subDir)
                if not os.path.exists(classDir):
                    os.makedirs(classDir)

                pbar3 = trange(nFrames,
                               ncols=100,
                               position=4,
                               desc='Video progress        ')