Beispiel #1
0
    def test_rgb_rotate(self):

        path = self.sandboxed('rgb_rotate.mov')
        output = av.open(path, 'w')

        write_rgb_rotate(output)
        assert_rgb_rotate(self, av.open(path))
Beispiel #2
0
 def test_error_attributes(self):
     try:
         av.open('does not exist')
     except AVError as e:
         self.assertEqual(e.errno, 2)
         if is_windows:
             self.assertTrue(e.strerror in ['Error number -2 occurred',
                                            'No such file or directory'])
         else:
             self.assertEqual(e.strerror, 'No such file or directory')
         self.assertEqual(e.filename, 'does not exist')
     else:
         self.fail('no exception raised')
Beispiel #3
0
    def test_buffer_read_write(self):

        buffer_ = StringIO()
        wrapped = MethodLogger(buffer_)
        write_rgb_rotate(av.open(wrapped, 'w', 'mp4'))

        # Make sure it did actually write.
        writes = wrapped._filter('write')
        self.assertTrue(writes)

        self.assertTrue(buffer_.tell())

        # Standard assertions.
        buffer_.seek(0)
        assert_rgb_rotate(self, av.open(buffer_))
Beispiel #4
0
def main():
    inp = av.open('piano2.wav', 'r')
    out = av.open('piano2.mp3', 'w')
    ostream = out.add_stream("mp3")

    for frame in inp.decode(audio=0):
        frame.pts = None

        for p in ostream.encode(frame):
            out.mux(p)

    for p in ostream.encode(None):
        out.mux(p)

    out.close()
Beispiel #5
0
    def __init__(self, file, toc=None, format=None):
        if not hasattr(file, 'read'):
            file = str(file)
        self.file = file
        self.format = format
        self._container = None

        with av.open(self.file, format=self.format) as container:
            # Build a toc
            if toc is None:
                self._toc = np.cumsum([len(packet.decode())
                                       for packet in container.demux()
                                       if packet.stream.type == 'video'])
            else:
                if isinstance(toc, list):
                    self._toc = np.array(toc, dtype=np.int64)
                else:
                    self._toc = toc
            self._len = self._toc[-1]

            video_stream = [s for s in container.streams if s.type == 'video'][0]
            # PyAV always returns frames in color, and we make that
            # assumption in get_frame() later below, so 3 is hardcoded here:
            self._im_sz = video_stream.height, video_stream.width, 3

        self._load_fresh_file()
Beispiel #6
0
def read_video(v_name):
    """A workaround function for reading video.

    Apparently precompiled OpenCV couldn't read AVI videos on Mac OS X
    and Linux,
    therefore I use PyAV, a ffmpeg binding to extract video frames

    Parameters
    ----------
    v_name : string
        absolute path to video

    Returns
    -------
    frames : list
        An ordered list for storing frames
    num_frames : int
        number of frames in the video
    """
    container = av.open(v_name)
    video = next(s for s in container.streams if s.type == b'video')

    frames = []
    for packet in container.demux(video):
        for frame in packet.decode():
            frame_t = np.array(frame.to_image())
            frames.append(cv2.cvtColor(frame_t, cv2.COLOR_RGB2BGR))

    return frames, len(frames)
Beispiel #7
0
    def write_thumbnails(self, m3u8_obj, url):
        for p in m3u8_obj.playlists:
            quality = p.media[0].name
            if quality == 'Source':
                uri = p.uri

                r = requests.get(uri)
                last_line = r.text.split('\n')[-2]
                parsed_uri = urllib.parse.urlparse(uri)
                short_path = '/'.join(parsed_uri.path.split('/')[:-1])
                ts_uri = '{uri.scheme}://{uri.netloc}{short_path}/{ts_path}'.format(uri=parsed_uri, short_path=short_path, ts_path=last_line)

                ts_r = requests.get(ts_uri, stream=True)
                if ts_r.status_code == 200:
                    with open(self.ts_path, 'wb') as f:
                        for chunk in ts_r:
                            f.write(chunk)

                import av
                container = av.open(self.ts_path)
                video = next(s for s in container.streams if s.type == 'video')

                for packet in container.demux(video):
                    for frame in packet.decode():
                        im = frame.to_image()
                        im_tn = frame.to_image()
                        im_tn.thumbnail((240, 135))
                        for id in self.ids:
                            im.save(self.original_path.format(streamer=self.streamer, id=id))
                            im_tn.save(self.thumbnail_path.format(streamer=self.streamer, id=id))
                        return
Beispiel #8
0
    def __init__(self, filename, cache_size=16, fast_forward_thresh=32,
                 stream_index=0):
        self.filename = str(filename)
        self._container = av.open(self.filename)

        if len(self._container.streams.video) == 0:
            raise IOError("No valid video stream found in {}".format(filename))

        self._stream = self._container.streams.video[stream_index]

        try:
            self._duration = self._stream.duration * self._stream.time_base
        except TypeError:
            self._duration = self._container.duration / av.time_base

        self._frame_rate = self._stream.average_rate
        if self.duration <= 0 or len(self) <= 0:
            raise IOError("Video stream {} in {} has zero length.".format(stream_index, filename))

        self._cache = [None] * cache_size
        self._fast_forward_thresh = fast_forward_thresh

        demuxer = self._container.demux(streams=self._stream)

        # obtain first frame to get first time point
        # also tests for the presence of timestamps
        frame = next(_gen_frames(demuxer, self._stream.time_base))
        self._first_pts = frame.metadata['timestamp']

        frame = WrapPyAvFrame(frame.frame, 0, frame.metadata)
        self._cache[0] = frame
        self._frame_shape = (self._stream.height, self._stream.width, 3)
        self._last_frame = 0

        self._reset_demuxer()
Beispiel #9
0
def writeFrames():
    global frameCount
    output = av.open(file='temp4.flv', mode='w')
    stream = output.add_stream("libx264", 15)
    stream.bit_rate = 8000000
    stream.pix_fmt = "yuv420p"
    stream.width = 640
    stream.height = 480
    print "stream is a: %s" % type(stream)
    while not exitFlag:
        temp=q.get()
        temp.save('test/%04d.jpg' % frameCount)
        frame=av.VideoFrame.from_image(temp)
        print "%d" % frame.index
        #temp=temp.reformat(format="yuv420p")
        #print "Frame: %s" % temp
        packet = stream.encode(frame)
        #print "packet: %s" % packet
        if (type(packet) == "av.packet.Packet"):
            print "Found a packet, Muxing."
            output.mux(packet) 
        else:
            print "Not a packet. Not counting"
        frameCount +=1
        
    packet = stream.encode() 
    output.mux(packet)
    output.close()
Beispiel #10
0
def read_video(fn, start_frame=None, duration=None):
    """Takes a path to a video file. Return a generator which will generator a
    PIL image for each video frame.

    If start_frame is not None, an attempt is made to seek to that (0-indexed)
    frame.

    If duration is not None, yield at most that many frames.

    """
    LOG.info('Opening video file: {0}'.format(fn))
    container = av.open(fn)
    n_frames_yielded = 0
    frame_idx = 0 if start_frame is None else start_frame
    container.seek(frame_idx, 'frame')
    stream = next(s for s in container.streams if s.type == 'video')
    for packet in container.demux(stream):
        for frame in packet.decode():
            if duration is not None and n_frames_yielded >= duration:
                return

            # Re-format frame
            frame = frame.reformat(frame.width, frame.height, 'rgb24')

            # Yield frame and frame index
            yield frame_idx, frame.to_image()

            frame_idx += 1
            n_frames_yielded += 1
Beispiel #11
0
    def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, use_timestamps=False):
        super().__init__()
        self.use_timestamps = use_timestamps
        self.timestamps = []
        # the approximate capture rate.
        self.fps = int(fps)
        directory, video_file = os.path.split(file_loc)
        name, ext = os.path.splitext(video_file)

        if ext not in ('.mp4', '.mov', '.mkv'):
            logger.warning("media file container should be mp4 or mov. Using a different container is risky.")

        self.file_loc = file_loc
        self.container = av.open(self.file_loc, 'w')
        logger.debug("Opened '{}' for writing.".format(self.file_loc))

        if self.use_timestamps:
            self.time_base = Fraction(1, 65535)  # highest resolution for mp4
        else:
            self.time_base = Fraction(1000, self.fps*1000)  # timebase is fps

        self.video_stream = self.container.add_stream(video_stream['codec'], 1/self.time_base)
        self.video_stream.bit_rate = video_stream['bit_rate']
        self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20
        self.video_stream.thread_count = 1
        # self.video_stream.pix_fmt = "yuv420p"
        self.configured = False
        self.start_time = None

        self.current_frame_idx = 0
Beispiel #12
0
    def __init__(self, file_loc, fps=30):
        super().__init__()
        # the approximate capture rate.
        self.fps = int(fps)
        self.time_base = Fraction(1000, self.fps * 1000)
        self.timestamps = []
        directory, video_file = os.path.split(file_loc)
        name, ext = os.path.splitext(video_file)

        if ext not in (".mp4"):
            logger.warning(
                "media file container should be mp4. Using a different container is risky."
            )

        self.file_loc = file_loc
        self.container = av.open(self.file_loc, "w")
        logger.debug("Opened '{}' for writing.".format(self.file_loc))

        self.video_stream = self.container.add_stream("mjpeg", 1 / self.time_base)
        self.video_stream.pix_fmt = "yuvj422p"
        self.video_stream.time_base = self.time_base
        self.configured = False
        self.frame_count = 0

        self.write_video_frame_compressed = self.write_video_frame
Beispiel #13
0
    def set_file(self, path):
        self.file = av.open(path)
        self.stream = next(s for s in self.file.streams if s.type == b'video')
        self.rate = get_frame_rate(self.stream)
        self.time_base = float(self.stream.time_base)
        

        index, first_frame = next(self.next_frame())
        self.stream.seek(self.stream.start_time)

        # find the pts of the first frame
        index, first_frame = next(self.next_frame())

        if self.pts_seen:
            pts = first_frame.pts
        else:
            pts = first_frame.dts
 
        self.start_time = pts or first_frame.dts
            
        print "First pts", pts, self.stream.start_time, first_frame

        #self.nb_frames = get_frame_count(self.file, self.stream)
        self.nb_frames = self.get_frame_count()
        
        dur = None
        
        if self.stream.duration:
            dur = self.stream.duration * self.time_base
        else:
            dur = self.file.duration * 1.0 / float(AV_TIME_BASE)
        
        self.update_frame_range.emit(dur, self.rate)
Beispiel #14
0
def decode_using_pyav():

    print('Decoding using PyAV.')
    fh = av.open('ffv1_level3.nut', 'r', options={'refcounted_frames': '1'})
    for s in fh.streams:
        #print s, s.thread_type, s.thread_count
        #pass
        print('Thread count:', s.thread_count)
        #s.thread_count = 1
        #s.thread_type = 'frame'


    count = 0

    packet_iter = fh.demux()
    while True:

        #av.utils._debug_enter('__main__.demux')
        packet = next(packet_iter)
        #av.utils._debug_exit()

        #av.utils._debug_enter('__main__.decode')
        frames = packet.decode()
        #av.utils._debug_exit()

        for frame in frames:
            count += 1
            print(count, end=' ')
            tick()
            if not count % 100:
                gc.collect()
                #print 'GARBAGE:', gc.get_count(), len(gc.garbage)
            if count >= 10000:
                return
Beispiel #15
0
    def test_stream_index(self):
        output = av.open(self.sandboxed('output.mov'), 'w')

        vstream = output.add_stream('mpeg4', 24)
        vstream.pix_fmt = 'yuv420p'
        vstream.width = 320
        vstream.height = 240

        astream = output.add_stream('mp2', 48000)
        astream.channels = 2
        astream.format = 's16'

        self.assertEqual(vstream.index, 0)
        self.assertEqual(astream.index, 1)

        vframe = VideoFrame(320, 240, 'yuv420p')
        vpacket = vstream.encode(vframe)[0]

        self.assertIs(vpacket.stream, vstream)
        self.assertEqual(vpacket.stream_index, 0)

        for i in range(10):
            aframe = AudioFrame('s16', 'stereo', samples=astream.frame_size)
            aframe.rate = 48000
            apackets = astream.encode(aframe)
            if apackets:
                apacket = apackets[0]
                break

        self.assertIs(apacket.stream, astream)
        self.assertEqual(apacket.stream_index, 1)
Beispiel #16
0
def convert():
	global args
	container = av.open(args.video)
	video = next(s for s in container.streams if s.type == b'video')

	pc = 0
	is_last = False
	for packet in container.demux(video):
		if is_last:
			break
		for frame in packet.decode():
			export_frame = True

			# is this frame smaller than start frame (if defined)
			if args.sf > 0 and frame.index < args.sf:
				export_frame = False

			# is this frame bigger than end frame (if defined)
			if args.ef > 0 and frame.index > args.ef:
				export_frame = False
				is_last = True
				break

			# check is this frame amount of steps
			if args.fs > 0 and not pc%args.fs == 0:
				export_frame = False

			filename = '%s/%s%06d.jpg' % (args.outputdir, args.fileprefix,frame.index)
#			print "export %s file=%s, index=%s, pc=%s %s" % (export_frame,filename,frame.index,pc, (pc%args.fs))
			pc+=1
			if not export_frame:
				continue
			print "export file=%s, frame index=%s" % (filename,frame.index)
        		frame.to_image().save(filename)
Beispiel #17
0
    def __init__(self, file_loc,fps=30):
        super(JPEG_Writer, self).__init__()
        # the approximate capture rate.
        self.fps = int(fps)
        self.time_base = Fraction(1000,self.fps*1000)
        file_loc = str(file_loc) #force str over unicode.
        try:
            file_path,ext = file_loc.rsplit('.', 1)
        except:
            logger.error("'%s' is not a valid media file name."%file_loc)
            raise Exception("Error")

        if ext not in ('mp4'):
            logger.warning("media file container should be mp4. Using a different container is risky.")

        self.file_loc = file_loc
        self.container = av.open(self.file_loc,'w')
        logger.debug("Opended '%s' for writing."%self.file_loc)

        self.video_stream = self.container.add_stream('mjpeg',1/self.time_base)
        self.video_stream.pix_fmt = "yuvj422p"
        self.configured = False
        self.frame_count = 0

        self.write_video_frame_compressed = self.write_video_frame
Beispiel #18
0
    def __init__(self, file_loc, video_stream={'codec':'mpeg4','bit_rate': 8000*10e3}, audio_stream=None):
        super(AV_Writer, self).__init__()

        try:
            file_path,ext = file_loc.rsplit('.', 1)
        except:
            logger.error("'%s' is not a valid media file name."%file_loc)
            raise Exception("Error")

        if ext not in ('mp4,mov,mkv'):
            logger.warning("media file container should be mp4 or mov. Using a different container is risky.")

        self.ts_file_loc = file_path+'_timestamps_pts.npy'
        self.file_loc = file_loc
        self.container = av.open(self.file_loc,'w')
        logger.debug("Opended '%s' for writing."%self.file_loc)

        self.time_resolution = 1000  # time_base in milliseconds
        self.time_base = Fraction(1,self.time_resolution)


        self.video_stream = self.container.add_stream(video_stream['codec'],self.time_resolution)
        self.video_stream.bit_rate = video_stream['bit_rate']
        self.video_stream.thread_count = 1
        # self.video_stream.pix_fmt = "yuv420p"#video_stream['format']
        self.configured = False
        self.start_time = None

        self.timestamps_list = []
Beispiel #19
0
    def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, audio_stream=None,use_timestamps=False):
        super(AV_Writer, self).__init__()
        self.use_timestamps = use_timestamps
        # the approximate capture rate.
        self.fps = int(fps)
        file_loc = str(file_loc) #force str over unicode.
        try:
            file_path,ext = file_loc.rsplit('.', 1)
        except:
            logger.error("'%s' is not a valid media file name."%file_loc)
            raise Exception("Error")

        if ext not in ('mp4,mov,mkv'):
            logger.warning("media file container should be mp4 or mov. Using a different container is risky.")

        self.ts_file_loc = file_path+'_timestamps_pts.npy'
        self.file_loc = file_loc
        self.container = av.open(self.file_loc,'w')
        logger.debug("Opended '%s' for writing."%self.file_loc)

        if self.use_timestamps:
            self.time_base = Fraction(1,65535) #highest resolution for mp4
        else:
            self.time_base = Fraction(1000,self.fps*1000) #timebase is fps

        self.video_stream = self.container.add_stream(video_stream['codec'],1/self.time_base)
        self.video_stream.bit_rate = video_stream['bit_rate']
        self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20
        self.video_stream.thread_count = 1
        # self.video_stream.pix_fmt = "yuv420p"
        self.configured = False
        self.start_time = None

        self.current_frame_idx = 0
Beispiel #20
0
def readFrames():
    container = av.open(format="video4linux2",file='/dev/video0')
    video = next(s for s in container.streams if s.type == b'video')

    for packet in container.demux(video):
        for frame in packet.decode():
            #frame.to_image().save('frame-%04d.jpg' % frame.index)
            q.put(frame.to_image())
Beispiel #21
0
    def _configure(self, camera_num=0, **options):
        self.camera_num = camera_num
        self.options = options

        container = av.open('/dev/video{}'.format(self.camera_num), 'r','video4linux2', self.options)
        stream = next(s for s in container.streams if s.type == 'video')
        self.output.spec['shape'] = (stream.format.height, stream.format.width, 3)
        self.output.spec['sample_rate'] = float(stream.average_rate)
Beispiel #22
0
    def test_writing(self):

        path = self.sandboxed('writing.mov')
        with open(path, 'wb') as fh:
            wrapped = MethodLogger(fh)

            output = av.open(wrapped, 'w', 'mov')
            write_rgb_rotate(output)
            output.close()
            fh.close()

            # Make sure it did actually write.
            writes = wrapped._filter('write')
            self.assertTrue(writes)

            # Standard assertions.
            assert_rgb_rotate(self, av.open(path))
Beispiel #23
0
 def __init__(self,vidqueue,filename):
     super(VideoDecoder, self).__init__()
     self.appId = None
     self.running = False
     self.queue = vidqueue
     self.filename = filename
     self.container = av.open(filename)
     self.video = self.container.streams[1]
Beispiel #24
0
 def start(self,file_loc, audio_src):
     self.should_close.clear()
     if platform.system() == "Darwin":
         in_container = av.open('none:%s'%audio_src,format="avfoundation")
     else:
         in_container = None
     self.thread = Thread(target=rec_thread, args=(file_loc,in_container, audio_src,self.should_close))
     self.thread.start()
Beispiel #25
0
def convert_pupil_mobile_recording_to_v094(rec_dir):
    logger.info("Converting Pupil Mobile recording to v0.9.4 format")
    # convert time files and rename corresponding videos
    time_pattern = os.path.join(rec_dir, '*.time')
    for time_loc in glob.glob(time_pattern):
        time_file_name = os.path.split(time_loc)[1]
        time_name, time_ext = os.path.splitext(time_file_name)

        potential_locs = [os.path.join(rec_dir, time_name+ext) for ext in ('.mjpeg', '.mp4','.m4a')]
        existing_locs = [loc for loc in potential_locs if os.path.exists(loc)]
        if not existing_locs:
            continue
        else:
            video_loc = existing_locs[0]

        if time_name in ('Pupil Cam1 ID0', 'Pupil Cam1 ID1'):
            time_name = 'eye'+time_name[-1]  # rename eye files
        elif time_name in ('Pupil Cam1 ID2', 'Logitech Webcam C930e'):
            cam_calib_loc = os.path.join(rec_dir, 'camera_calibration')
            try:
                camera_calibration = load_object(cam_calib_loc)
            except:
                # no camera calibration found
                video = av.open(video_loc, 'r')
                frame_size = video.streams.video[0].format.width, video.streams.video[0].format.height
                del video
                try:
                    camera_calibration = pre_recorded_calibrations[time_name][frame_size]
                except KeyError:

                    camera_calibration = idealized_camera_calibration(frame_size)
                    logger.warning('Camera calibration not found. Will assume idealized camera.')
                save_object(camera_calibration, cam_calib_loc)

            time_name = 'world'  # assume world file
        elif time_name.startswith('audio_'):
            time_name = 'audio'

        timestamps = np.fromfile(time_loc, dtype='>f8')
        timestamp_loc = os.path.join(rec_dir, '{}_timestamps.npy'.format(time_name))
        logger.info('Creating "{}"'.format(os.path.split(timestamp_loc)[1]))
        np.save(timestamp_loc, timestamps)

        if time_name == 'audio':
            video_dst = os.path.join(rec_dir, time_name) + '.mp4'
            logger.info('Renaming "{}" to "{}"'.format(os.path.split(video_loc)[1], os.path.split(video_dst)[1]))
            os.rename(video_loc, video_dst)
        else:
            video_dst = os.path.join(rec_dir, time_name) + os.path.splitext(video_loc)[1]
            logger.info('Renaming "{}" to "{}"'.format(os.path.split(video_loc)[1], os.path.split(video_dst)[1]))
            os.rename(video_loc, video_dst)

    pupil_data_loc = os.path.join(rec_dir, 'pupil_data')
    if not os.path.exists(pupil_data_loc):
        logger.info('Creating "pupil_data"')
        save_object({'pupil_positions': [],
                     'gaze_positions': [],
                     'notifications': []}, pupil_data_loc)
Beispiel #26
0
    def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, audio_loc=None, use_timestamps=False):
        super().__init__()
        self.use_timestamps = use_timestamps
        self.timestamps = []
        # the approximate capture rate.
        self.fps = int(fps)
        directory, video_file = os.path.split(file_loc)
        name, ext = os.path.splitext(video_file)

        if ext not in ('.mp4', '.mov', '.mkv'):
            logger.warning("media file container should be mp4 or mov. Using a different container is risky.")

        self.file_loc = file_loc
        self.container = av.open(self.file_loc, 'w')
        logger.debug("Opened '{}' for writing.".format(self.file_loc))

        if self.use_timestamps:
            self.time_base = Fraction(1, 65535)  # highest resolution for mp4
        else:
            self.time_base = Fraction(1000, self.fps*1000)  # timebase is fps

        if audio_loc:
            audio_dir = os.path.split(audio_loc)[0]
            audio_ts_loc = os.path.join(audio_dir, 'audio_timestamps.npy')
            audio_exists = os.path.exists(audio_loc) and os.path.exists(audio_ts_loc)
            if audio_exists:
                self.audio_rec = av.open(audio_loc)
                self.audio_ts = np.load(audio_ts_loc)
                self.audio_export = self.container.add_stream(template=self.audio_rec.streams.audio[0])
            else:
                logger.warning('Could not mux audio. File not found.')
                self.audio_export = False
        else:
            self.audio_export = False

        self.video_stream = self.container.add_stream(video_stream['codec'], 1/self.time_base)
        self.video_stream.bit_rate = video_stream['bit_rate']
        self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20
        self.video_stream.thread_count = 1
        # self.video_stream.pix_fmt = "yuv420p"
        self.configured = False
        self.start_time = None

        self.current_frame_idx = 0
        self.audio_packets_decoded = 0
Beispiel #27
0
def rec_thread(file_loc, in_container, audio_src,should_close):
    # print sys.modules['av']
    # import av
    if not in_container:
        #create in container
        if platform.system() == "Darwin":
            in_container = av.open('none:%s'%audio_src,format="avfoundation")
        elif platform.system() == "Linux":
            in_container = av.open('hw:%s'%audio_src,format="alsa")

    in_stream = None

    # print len(in_container.streams), 'stream(s):'
    for i, stream in enumerate(in_container.streams):

        if stream.type == 'audio':
            # print '\t\taudio:'
            # print '\t\t\tformat:', stream.format
            # print '\t\t\tchannels: %s' % stream.channels
            in_stream = stream
            break

    if in_stream is None:
        # logger.error("No input audio stream found.")
        return

    #create out container
    out_container = av.open(file_loc,'w')
    # logger.debug("Opended '%s' for writing."%file_loc)
    out_stream =  out_container.add_stream(template = in_stream)


    for packet in in_container.demux(in_stream):
        # for frame in packet.decode():
        #     packet = out_stream.encode(frame)
        #     if packet:
        # print '%r' %packet
        # print '\tduration: %s' % format_time(packet.duration, packet.stream.time_base)
        # print '\tpts: %s' % format_time(packet.pts, packet.stream.time_base)
        # print '\tdts: %s' % format_time(packet.dts, packet.stream.time_base)
        out_container.mux(packet)
        if should_close.is_set():
            break

    out_container.close()
Beispiel #28
0
    def test_audio_default_options(self):
        output = av.open(self.sandboxed('output.mov'), 'w')

        stream = output.add_stream('mp2')
        self.assertEqual(stream.bit_rate, 128000)
        self.assertEqual(stream.format.name, 's16')
        self.assertEqual(stream.rate, 48000)
        self.assertEqual(stream.ticks_per_frame, 1)
        self.assertEqual(stream.time_base, None)
Beispiel #29
0
    def test_selection(self):

        container = av.open(fate_suite('h264/interlaced_crop.mp4'))
        video = container.streams.video[0]
        # audio_stream = container.streams.audio[0]
        # audio_streams = list(container.streams.audio[0:2])

        self.assertEqual([video], container.streams.get(video=0))
        self.assertEqual([video], container.streams.get(video=(0, )))
Beispiel #30
0
    def image_sequence_encode(self, codec_name):

        try:
            codec = Codec(codec_name, 'w')
        except UnknownCodecError:
            raise SkipTest()

        container = av.open(fate_suite('h264/interlaced_crop.mp4'))
        video_stream = container.streams.video[0]

        width = 640
        height = 480

        ctx = codec.create()

        pix_fmt = ctx.codec.video_formats[0].name

        ctx.width = width
        ctx.height = height
        ctx.time_base = video_stream.codec_context.time_base
        ctx.pix_fmt = pix_fmt
        ctx.open()

        frame_count = 1
        path_list = []
        for frame in iter_frames(container, video_stream):

            new_frame = frame.reformat(width, height, pix_fmt)
            new_packets = ctx.encode(new_frame)

            self.assertEqual(len(new_packets), 1)
            new_packet = new_packets[0]

            path = self.sandboxed('%s/encoder.%04d.%s' % (
                codec_name,
                frame_count,
                codec_name if codec_name != 'mjpeg' else 'jpg',
            ))
            path_list.append(path)
            with open(path, 'wb') as f:
                f.write(new_packet)
            frame_count += 1
            if frame_count > 5:
                break

        ctx = av.Codec(codec_name, 'r').create()

        for path in path_list:
            with open(path, 'rb') as f:
                size = os.fstat(f.fileno()).st_size
                packet = Packet(size)
                size = f.readinto(packet)
                frame = ctx.decode(packet)[0]
                self.assertEqual(frame.width, width)
                self.assertEqual(frame.height, height)
                self.assertEqual(frame.format.name, pix_fmt)
Beispiel #31
0
def main():
    drone = tellopy.Tello()
    os.makedirs('raw_data', exist_ok=True)  #生データの保存するディレクトリの作成
    os.makedirs('take_picture', exist_ok=True)  #撮影時のディレクトリ
    os.makedirs('process_picture', exist_ok=True)  #撮影時の加工画像を入れるディレクトリ
    SCREEN_WIDTH = 640
    SCREEN_HEIGHT = 480

    pygame.joystick.init()
    try:
        joy = pygame.joystick.Joystick(0)  # create a joystick instance
        joy.init()  # init instance
        print('Joystickの名称: ' + joy.get_name())
        print('ボタン数 : ' + str(joy.get_numbuttons()))
        pygame.init()
        screen = pygame.display.set_mode(
            (SCREEN_WIDTH, SCREEN_HEIGHT))  # 画面を作る
        pygame.display.set_caption('Joystick')  # タイトル
        pygame.display.flip()  # 画面を反映
    except pygame.error:
        print('Joystickが見つかりませんでした。')

    try:
        drone.connect()
        drone.wait_for_connection(20.0)

        retry = 3
        container = None
        while container is None and 0 < retry:
            retry -= 1
            try:
                container = av.open(drone.get_video_stream())
            except av.AVError as ave:
                print(ave)
                print('retry...')

        fly_sw = False  #takeoffとlandの切り替え
        scale = 4  #適時変更
        # skip first 300 frames
        frame_skip = 300

        raw_count = 0  #rawfile_no
        picture_count = 0  #picturefile_no

        while True:

            for frame in container.decode(video=0):
                if 0 < frame_skip:
                    frame_skip = frame_skip - 1
                    continue
                start_time = time.time()
                image = cv2.cvtColor(numpy.array(frame.to_image()),
                                     cv2.COLOR_RGB2BGR)
                cv2.imshow('Original', image)
                cv2.waitKey(1)
                if frame.time_base < 1.0 / 60:
                    time_base = 1.0 / 60
                else:
                    time_base = frame.time_base
                frame_skip = int((time.time() - start_time) / time_base)
                dir_write('raw_data', 'frame_{:04d}.png'.format(raw_count),
                          image)
                raw_count += 1

                for e in pygame.event.get():  # イベントチェック
                    if e.type == QUIT:  # 終了が押された?
                        drone.quit()
                        return
                    if e.type == KEYDOWN and e.key == K_ESCAPE:  # ESCが押された?
                        drone.quit()
                        return

                    # Joystick関連のイベントチェック
                    if e.type == pygame.locals.JOYAXISMOTION:
                        x1, y1 = joy.get_axis(0), joy.get_axis(
                            1)  #左スティックのx,yに値の格納
                        x2, y2 = joy.get_axis(4), joy.get_axis(
                            3)  #右スティックのx,yに値の格納
                        #print('x and y : ' + str(x) +' , '+ str(y))

                        drone.left_x = -x1
                        drone.left_y = -y1

                        drone.right_x = x2 / scale
                        drone.right_y = -y2 / scale
                    elif e.type == pygame.locals.JOYBALLMOTION:
                        print('ball motion')
                    elif e.type == pygame.locals.JOYHATMOTION:
                        print('hat motion')
                    elif e.type == pygame.locals.JOYBUTTONDOWN:
                        print(str(e.button) + '番目のボタンが押された')
                        if int(e.button) == 7 and fly_sw == False:  #start
                            drone.takeoff()
                            fly_sw = True

                        elif int(e.button) == 7 and fly_sw == True:  #start
                            drone.land()
                            drone.quit()
                            cv2.destroyAllWindows()
                            filepath = os.path.join('raw_data')

                            files = os.listdir(filepath)
                            raw_count = 0

                            for file in files:
                                index = re.search('.png', file)
                                if index:
                                    raw_count += 1

                            print(raw_count)
                            #ビデオとして結合
                            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
                            video = cv2.VideoWriter('replay.mp4', fourcc, 20.0,
                                                    (640, 480))

                            for i in range(0, raw_count):
                                filepath = os.path.join(
                                    'raw_data', 'frame_{:04d}.png'.format(i))
                                img = cv2.imread(filepath)
                                img = cv2.resize(img, (640, 480))
                                video.write(img)

                            video.release()

                            for i in range(0, picture_count):
                                filepath = os.path.join(
                                    'take_picture',
                                    'picture_{:04d}.png'.format(i))
                                img = cv2.imread(filepath)
                                print(cv2.Laplacian(
                                    img, cv2.CV_64F).var())  #ラプラシアン微分
                                pixel = pic.pixelArt(img)
                                dir_write('process_picture',
                                          'dot_{:04d}.png'.format(i), pixel)
                                water = pic.waterColor(img)
                                dir_write('process_picture',
                                          'water_{:04d}.png'.format(i), water)
                                oil = pic.oilPaint(img)
                                dir_write('process_picture',
                                          'oil_{:04d}.png'.format(i), oil)

                            fly_sw = False

                        if int(e.button) == 3:  #Y
                            dir_write(
                                'take_picture',
                                'picture_{:04d}.png'.format(picture_count),
                                image)
                            picture_count += 1

                    elif e.type == pygame.locals.JOYBUTTONUP:
                        print(str(e.button) + '番目のボタンが離された')

    except Exception as ex:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback)
        print(ex)
    finally:
        drone.quit()
        cv2.destroyAllWindows()
def load_video(path_to_video):
    # load the raw ultrasound video
    video = av.open(path_to_video)
    return video
Beispiel #33
0
def output_stream_write():
    #time.sleep(5)
    while True:
        container = av.open(tmp)
        print(container)
        output.mux(frame)
Beispiel #34
0

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)

    cap = Audio_Capture('test.wav','default')

    import time
    time.sleep(5)
    cap.cleanup()
    #mic device
    exit()


    # container = av.open('hw:0',format="alsa")
    container = av.open('1:0',format="avfoundation")
    print 'container:', container
    print '\tformat:', container.format
    print '\tduration:', float(container.duration) / av.time_base
    print '\tmetadata:'
    for k, v in sorted(container.metadata.iteritems()):
        print '\t\t%s: %r' % (k, v)
    print

    print len(container.streams), 'stream(s):'
    audio_stream = None
    for i, stream in enumerate(container.streams):

        print '\t%r' % stream
        print '\t\ttime_base: %r' % stream.time_base
        print '\t\trate: %r' % stream.rate
Beispiel #35
0
def extract_frames(video_path, time_left):
    frames = []
    video = av.open(video_path)
    for frame in video.decode(0):
        yield frame.to_image()
Beispiel #36
0
arg_parser.add_argument("output")
arg_parser.add_argument("-F", "--iformat")
arg_parser.add_argument("-O", "--ioption", action="append", default=[])
arg_parser.add_argument("-f", "--oformat")
arg_parser.add_argument("-o", "--ooption", action="append", default=[])
arg_parser.add_argument("-a", "--noaudio", action="store_true")
arg_parser.add_argument("-v", "--novideo", action="store_true")
arg_parser.add_argument("-s", "--nosubs", action="store_true")
arg_parser.add_argument("-d", "--nodata", action="store_true")
arg_parser.add_argument("-c", "--count", type=int, default=0)
args = arg_parser.parse_args()


input_ = av.open(
    args.input,
    format=args.iformat,
    options=dict(x.split("=") for x in args.ioption),
)
output = av.open(
    args.output,
    "w",
    format=args.oformat,
    options=dict(x.split("=") for x in args.ooption),
)

in_to_out = {}

for i, stream in enumerate(input_.streams):
    if (
        (stream.type == "audio" and not args.noaudio)
        or (stream.type == "video" and not args.novideo)
Beispiel #37
0
    def make_update():
        surface_definitions_path = os.path.join(rec_dir, "surface_definitions")
        if not os.path.exists(surface_definitions_path):
            return

        surface_definitions_dict = fm.Persistent_Dict(surface_definitions_path)
        surface_definitions_backup_path = os.path.join(
            rec_dir, "surface_definitions_deprecated")
        os.rename(surface_definitions_path, surface_definitions_backup_path)

        intrinsics_path = os.path.join(rec_dir, "world.intrinsics")
        if not os.path.exists(intrinsics_path):
            logger.warning(
                "Loading surface definitions failed: The data format of the "
                "surface definitions in this recording "
                "is too old and is no longer supported!")
            return

        valid_ext = (".mp4", ".mkv", ".avi", ".h264", ".mjpeg")
        existing_videos = [
            f for f in glob.glob(os.path.join(rec_dir, "world.*"))
            if os.path.splitext(f)[1] in valid_ext
        ]
        if not existing_videos:
            return

        world_video_path = existing_videos[0]
        world_video = av.open(world_video_path)
        f = world_video.streams.video[0].format
        resolution = f.width, f.height

        intrinsics = cm.load_intrinsics(rec_dir, "world", resolution)

        DEPRECATED_SQUARE_MARKER_KEY = "realtime_square_marker_surfaces"
        if DEPRECATED_SQUARE_MARKER_KEY not in surface_definitions_dict:
            return
        surfaces_definitions_old = surface_definitions_dict[
            DEPRECATED_SQUARE_MARKER_KEY]

        surfaces_definitions_new = []
        for surface_def_old in surfaces_definitions_old:
            surface_def_new = {}
            surface_def_new["deprecated"] = True
            surface_def_new["name"] = surface_def_old["name"]
            surface_def_new["real_world_size"] = surface_def_old[
                "real_world_size"]
            surface_def_new["build_up_status"] = 1.0

            reg_markers = []
            registered_markers_dist = []
            for id, verts in surface_def_old["markers"].items():
                reg_marker_dist = {"id": id, "verts_uv": verts}
                registered_markers_dist.append(reg_marker_dist)

                verts_undist = undistort_vertices(verts, intrinsics)
                reg_marker = {"id": id, "verts_uv": verts_undist}
                reg_markers.append(reg_marker)

            surface_def_new[
                "registered_markers_dist"] = registered_markers_dist
            surface_def_new["reg_markers"] = reg_markers

            surfaces_definitions_new.append(surface_def_new)

        surface_definitions_dict_new = fm.Persistent_Dict(
            surface_definitions_path)
        surface_definitions_dict_new["surfaces"] = surfaces_definitions_new
        surface_definitions_dict_new.save()
Beispiel #38
0
 def _get_av_container(self):
     if isinstance(self._source_path[0], io.BytesIO):
         self._source_path[0].seek(0)  # required for re-reading
     return av.open(self._source_path[0])
Beispiel #39
0
 def open(self, filename):
     if os.path.isfile(filename):
         self._media_container = av.open(filename)
Beispiel #40
0
 def _load_fresh_file(self):
     self._demuxed_container = av.open(self.filename).demux()
     self._current_packet = next(self._demuxed_container).decode()
     self._packet_cursor = 0
     self._frame_cursor = 0
Beispiel #41
0
    try:
        from billiard import forking_enable
        forking_enable(0)
    except ImportError:
        pass
    logging.basicConfig(level=logging.DEBUG)

    cap = Audio_Capture('test.wav', 1)

    import time
    time.sleep(2)
    cap.close()
    #mic device
    exit()

    container = av.open('hw:0', format="alsa")
    container = av.open(':0', format="avfoundation")
    print 'container:', container
    print '\tformat:', container.format
    print '\tduration:', float(container.duration) / av.time_base
    print '\tmetadata:'
    for k, v in sorted(container.metadata.iteritems()):
        print '\t\t%s: %r' % (k, v)
    print

    print len(container.streams), 'stream(s):'
    audio_stream = None
    for i, stream in enumerate(container.streams):

        print '\t%r' % stream
        print '\t\ttime_base: %r' % stream.time_base
Beispiel #42
0
 def _open_container(self):
     cont = av.open(self.path)
     return cont
Beispiel #43
0
 def __init__(self, file, format=None, options={}):
     self.__container = av.open(file=file,
                                format=format,
                                mode="w",
                                options=options)
     self.__tracks = {}
def renderAv(ffmpeg, ffprobe, vidFile: str, args, chunks: list, speeds: list, fps,
    has_vfr, temp, log):
    import av

    totalFrames = chunks[len(chunks) - 1][1]
    videoProgress = ProgressBar(totalFrames, 'Creating new video',
        args.machine_readable_progress, args.no_progress)

    if(has_vfr):
        class Wrapper:
            """
            Wrapper which only exposes the `read` method to avoid PyAV
            trying to use `seek`.
            From: github.com/PyAV-Org/PyAV/issues/578#issuecomment-621362337
            """

            name = "<wrapped>"

            def __init__(self, fh):
                self._fh = fh

            def read(self, buf_size):
                return self._fh.read(buf_size)

        # Create a cfr stream on stdout.
        cmd = ['-i', vidFile, '-map', '0:v:0', '-vf', f'fps=fps={fps}', '-r', str(fps),
            '-vsync', '1', '-f', 'matroska', '-vcodec', 'rawvideo', 'pipe:1']

        wrapper = Wrapper(ffmpeg.Popen(cmd).stdout)
        input_ = av.open(wrapper, 'r')
    else:
        input_ = av.open(vidFile)

    inputVideoStream = input_.streams.video[0]
    inputVideoStream.thread_type = 'AUTO'

    width = inputVideoStream.width
    height = inputVideoStream.height
    pix_fmt = inputVideoStream.pix_fmt

    log.debug(f'   - pix_fmt: {pix_fmt}')

    cmd = [ffmpeg.getPath(), '-hide_banner', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo',
        '-pix_fmt', pix_fmt, '-s', f'{width}*{height}', '-framerate', f'{fps}', '-i', '-',
        '-pix_fmt', pix_fmt]

    if(args.scale != 1):
        cmd.extend(['-vf', f'scale=iw*{args.scale}:ih*{args.scale}'])
    cmd = properties(cmd, args, vidFile, ffprobe)

    cmd.append(f'{temp}{sep()}spedup.mp4')

    if(args.show_ffmpeg_debug):
        process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE)
    else:
        process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL)

    inputEquavalent = 0.0
    outputEquavalent = 0
    index = 0
    chunk = chunks.pop(0)

    try:
        for packet in input_.demux(inputVideoStream):
            for frame in packet.decode():
                index += 1
                if(len(chunks) > 0 and index >= chunk[1]):
                    chunk = chunks.pop(0)

                if(speeds[chunk[2]] != 99999):
                    inputEquavalent += (1 / speeds[chunk[2]])

                while inputEquavalent > outputEquavalent:
                    in_bytes = frame.to_ndarray().tobytes()
                    process2.stdin.write(in_bytes)
                    outputEquavalent += 1

                videoProgress.tick(index - 1)
        process2.stdin.close()
        process2.wait()
    except BrokenPipeError:
        log.print(cmd)
        process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE)
        log.error('Broken Pipe Error!')

    if(log.is_debug):
        log.debug('Writing the output file.')
    else:
        log.conwrite('Writing the output file.')
Beispiel #45
0
def update_recording_v094_to_v0913(rec_dir, retry_on_averror=True):
    try:
        logger.info("Updating recording from v0.9.4 to v0.9.13")

        wav_file_loc = os.path.join(rec_dir, "audio.wav")
        aac_file_loc = os.path.join(rec_dir, "audio.mp4")
        audio_ts_loc = os.path.join(rec_dir, "audio_timestamps.npy")
        backup_ts_loc = os.path.join(rec_dir, "audio_timestamps_old.npy")
        if os.path.exists(wav_file_loc) and os.path.exists(audio_ts_loc):
            in_container = av.open(wav_file_loc)
            in_stream = in_container.streams.audio[0]
            in_frame_size = 0
            in_frame_num = 0

            out_container = av.open(aac_file_loc, "w")
            out_stream = out_container.add_stream("aac")

            for in_packet in in_container.demux():
                for audio_frame in in_packet.decode():
                    if not in_frame_size:
                        in_frame_size = audio_frame.samples
                    in_frame_num += 1
                    out_packet = out_stream.encode(audio_frame)
                    if out_packet is not None:
                        out_container.mux(out_packet)

            # flush encoder
            out_packet = out_stream.encode(None)
            while out_packet is not None:
                out_container.mux(out_packet)
                out_packet = out_stream.encode(None)

            out_frame_size = out_stream.frame_size
            out_frame_num = out_stream.frames
            out_frame_rate = out_stream.rate
            in_frame_rate = in_stream.rate

            out_container.close()

            old_ts = np.load(audio_ts_loc)
            np.save(backup_ts_loc, old_ts)

            if len(old_ts) != in_frame_num:
                in_frame_size /= len(old_ts) / in_frame_num
                logger.debug(
                    "Provided audio frame size is inconsistent with amount of "
                    f"timestamps. Correcting frame size to {in_frame_size}")

            old_ts_idx = (
                np.arange(0,
                          len(old_ts) * in_frame_size, in_frame_size) *
                out_frame_rate / in_frame_rate)
            new_ts_idx = np.arange(0, out_frame_num * out_frame_size,
                                   out_frame_size)
            interpolate = interp1d(old_ts_idx,
                                   old_ts,
                                   bounds_error=False,
                                   fill_value="extrapolate")
            new_ts = interpolate(new_ts_idx)

            # raise RuntimeError
            np.save(audio_ts_loc, new_ts)

        _update_info_version_to("v0.9.13", rec_dir)
    except av.AVError as averr:
        # Try to catch `libav.aac : Input contains (near) NaN/+-Inf` errors
        # Unfortunately, the above error is only logged not raised. Instead
        # `averr`, an `Invalid Argument` error with error number 22, is raised.
        if retry_on_averror and averr.errno == 22:
            # unfortunately
            logger.error("Encountered AVError. Retrying to update recording.")
            out_container.close()
            # Only retry once:
            update_recording_v094_to_v0913(rec_dir, retry_on_averror=False)
        else:
            raise  # re-raise exception
Beispiel #46
0
import av

container = av.open("target_1280.mp4")

for frame in container.decode(video=0):
    frame.to_image().save('frame-%04d.jpg' % frame.index)
Beispiel #47
0
    def __call__(self, results):
        """
        Perform mp4 decode operations.
        return:
            List where each item is a numpy array after decoder.
        """
        file_path = results['filename']
        results['format'] = 'video'
        results['backend'] = self.backend

        if self.backend == 'cv2':
            cap = cv2.VideoCapture(file_path)
            videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            sampledFrames = []
            for i in range(videolen):
                ret, frame = cap.read()
                # maybe first frame is empty
                if ret == False:
                    continue
                img = frame[:, :, ::-1]
                sampledFrames.append(img)
            results['frames'] = sampledFrames
            results['frames_len'] = len(sampledFrames)

        elif self.backend == 'decord':
            container = de.VideoReader(file_path)
            frames_len = len(container)
            results['frames'] = container
            results['frames_len'] = frames_len

        elif self.backend == 'pyav':  # for TimeSformer
            if self.mode in ["train", "valid"]:
                clip_idx = -1
            elif self.mode in ["test"]:
                clip_idx = 0
            else:
                raise NotImplementedError

            container = av.open(file_path)

            num_clips = 1  # always be 1

            # decode process
            fps = float(container.streams.video[0].average_rate)

            frames_length = container.streams.video[0].frames
            duration = container.streams.video[0].duration

            if duration is None:
                # If failed to fetch the decoding information, decode the entire video.
                decode_all_video = True
                video_start_pts, video_end_pts = 0, math.inf
            else:
                decode_all_video = False
                start_idx, end_idx = get_start_end_idx(
                    frames_length,
                    self.sampling_rate * self.num_seg / self.target_fps * fps,
                    clip_idx, num_clips)
                timebase = duration / frames_length
                video_start_pts = int(start_idx * timebase)
                video_end_pts = int(end_idx * timebase)

            frames = None
            # If video stream was found, fetch video frames from the video.
            if container.streams.video:
                margin = 1024
                seek_offset = max(video_start_pts - margin, 0)

                container.seek(seek_offset,
                               any_frame=False,
                               backward=True,
                               stream=container.streams.video[0])
                tmp_frames = {}
                buffer_count = 0
                max_pts = 0
                for frame in container.decode(**{"video": 0}):
                    max_pts = max(max_pts, frame.pts)
                    if frame.pts < video_start_pts:
                        continue
                    if frame.pts <= video_end_pts:
                        tmp_frames[frame.pts] = frame
                    else:
                        buffer_count += 1
                        tmp_frames[frame.pts] = frame
                        if buffer_count >= 0:
                            break
                video_frames = [tmp_frames[pts] for pts in sorted(tmp_frames)]

                container.close()

                frames = [
                    frame.to_rgb().to_ndarray() for frame in video_frames
                ]
                clip_sz = self.sampling_rate * self.num_seg / self.target_fps * fps

                start_idx, end_idx = get_start_end_idx(
                    len(frames),  # frame_len
                    clip_sz,
                    clip_idx if decode_all_video else
                    0,  # If decode all video, -1 in train and valid, 0 in test;
                    # else, always 0 in train, valid and test, as we has selected clip size frames when decode.
                    1)
                results['frames'] = frames
                results['frames_len'] = len(frames)
                results['start_idx'] = start_idx
                results['end_idx'] = end_idx
        else:
            raise NotImplementedError
        return results
Beispiel #48
0
def mac_pyav_hack():
    if platform.system() == "Darwin":
        try:
            av.open(':0',format="avfoundation")
        except:
            pass
Beispiel #49
0
 def container(self):
     try:
         return self._container
     except AttributeError:
         self._container = av.open(self.url)
         return self.container
Beispiel #50
0
def output_stream_write():
    #time.sleep(5)
    while True:
        container = av.open(tmp)
        print(container)
        output.mux(frame)


session = livestreamer.Livestreamer()
session.set_option("http-headers", "Client-ID=ewvlchtxgqq88ru9gmfp1gmyt6h2b93")
streams = session.streams("http://www.twitch.tv/kalashz0r")
print(streams)
stream = streams['1080p60']

input = av.open(stream.url, options={'buffer_size': '1000000'})

tmp = BytesIO()

buffer = av.open(tmp, 'w', 'mp4')

output = av.open('rtmp://live.restream.io/live/re_882197_78625223222fd0769d68',
                 mode='w',
                 format='flv')

input_streams = list()
output_streams = list()
buffer_streams = list()

arrows = cv2.imread("/root/rama.png", -1)
Beispiel #51
0
import os
import sys

from cm import to_iron_ycbcr

with open(sys.argv[1], 'rb') as f:
    frames = int.from_bytes(f.read(4), 'little')
    height = int.from_bytes(f.read(4), 'little')
    width = int.from_bytes(f.read(4), 'little')

    if frames == 0:
        frames = (os.path.getsize(sys.argv[1]) - 12) // height // width // 2
        print('Frame counter not set!  Guessed from file size as {}'.format(
            frames))

    outvid = av.open(sys.argv[1] + '.mp4', 'w')
    if len(sys.argv) > 2:
        stream = outvid.add_stream('mpeg4', sys.argv[2])
    else:
        stream = outvid.add_stream('mpeg4', '50')
    stream.bit_rate = 10000000
    stream.pix_fmt = 'yuv420p'
    stream.width = width
    stream.height = height
    stream.thread_count = 3

    outframe = av.VideoFrame(width, height, 'yuv420p')

    for frameno in range(frames):
        I = np.ndarray(buffer=f.read(width * height * 2),
                       dtype=np.uint16,
Beispiel #52
0
def main():
    #drone = tellopy.Tello()

    try:
        #drone.connect()
        #drone.wait_for_connection(60.0)
        
        # drone.startControlCommand()
        # drone.takeoff()
        # drone.takeoff()
        # sleep(3)
        # drone.land()
        # sleep(3)
        #drone.set_video_encoder_rate(1)
        # Open webcam on OS X.
        #container = av.open(format='avfoundation', file='0') 
        #container = av.open(drone.get_video_stream())
        container = av.open('sangyy4.mp4')
        #container.VideoFrame(320, 180, 'rgb24')
        #container.width = 320
        #container.height = 180
        '''
        
         -camera (The camera index for cv::VideoCapture. Integer in the range [0,
      9]. Select a negative number (by default), to auto-detect and open the
      first available camera.) type: int32 default: -1
    -camera_fps (Frame rate for the webcam (also used when saving video). Set
      this value to the minimum value between the OpenPose displayed speed and
      the webcam real frame rate.) type: double default: 30
    -camera_parameter_folder (String with the folder where the camera
      parameters are located.) type: string
      default: "models/cameraParameters/flir/"
    -camera_resolution (Set the camera resolution (either `--camera` or
      `--flir_camera`). `-1x-1` will use the default 1280x720 for `--camera`,
      or the maximum flir camera resolution available for `--flir_camera`)
      type: string default: "-1x-1"

        
        '''
        print('Start Video Stream**********************************')
        # skip first 10 frames

        frame_skip = 20
        #count_frame = 10
        flags = numpy.zeros((1,4))
        pastidx = None  # a var to store info of indx, used in one person ver. to make same movement
        actor = None  # a var to identify which user gets the control of tello

        while True:
            for frame in container.decode(video=0):
                if 0 < frame_skip:
                    frame_skip = frame_skip-1
                    continue
                # start_time = time.time()
                interupt = cv2.waitKey(10)  # 10s to read keys? roll call?
                #frame(320, 180, 'rgb24')
                #frame = frame(320, 180)
                #frame = frame.reformat(320, -1, 'rgb24')
                image = cv2.cvtColor(numpy.array(frame.reformat(272, 480).to_image()), cv2.COLOR_RGB2BGR)
                #image = cv2.resize(image, (640, 360));
                keypoints, output_image = openpose.forward(image, True)
                # keypoints is a matrix filled in multi-person data
                # format:[[p1][p2][p3]]

                cv2.imshow("output", output_image)
                # print('get keypoint!')
                # print(keypoints)

                # once test input is not the 7 poses, return idx=6 ?
                # is it because the dist_all<0.7 is too general?
                # solution: modify idx when dist_all>0.7 (multi-person version)

                # for one person data matrix, size=3*25=75
                if 40 < numpy.size(keypoints) < 76:  # ensure that it is a valid data for one person
                    # implement knn
                    (idx, dist_all) = kNNtest.implement_kNN(keypoints)
                    print('One-Person mode')
                    actor = 0
                    # set actor as 0 in one person ver., if next frame is multi-person, we don't know who gets the control
                    # this setting is due to actor cannot be none for logic comparasion in the multi-person actor change stage

                    # print(dist_all)
                    if dist_all[0] < 0.7:
                        print('*****       Pose_Idx=%d       *****' % idx)

                        # if the idx is not the same, change idx
                        # if the idx is the same, do the same movement as the past idx indicates
                        # if the idx of the pose cannot be recgonized, the drone will still move as the pastidx (save energy for actor)
                        if idx != pastidx:
                            pastidx = idx
                            print('pose idx has changed to %d'%(idx))
                        idx2pose(pastidx)


                # for multi-person data matrices, size=n*(3*25)
                if numpy.size(keypoints) > 76:
                    print('multi-person mode')
                    person = len(keypoints) # a var used in person number changed between frames
                    idx_list = []           # a list to store idx of all the person in one frame

                    kp = dict()
                    # apply knn to all the person matrices one by one
                    for i in range(0, len(keypoints)):
                        a = []
                        a.append(keypoints[i])
                        print('seperate kp')
                        name = 'kp{}'.format(i)
                        kp[name] = array(a)

                        # ensure the points are enough for analysis
                        if 40 < numpy.size(kp[name]) < 76:
                            (idx, dist_all) = kNNtest.implement_kNN(kp[name])
                            print('idx, dist done')

                            # if the pose of the person cannot be matched with poseidx 0-6, then idx = none
                            if dist_all[0] > 0.7:
                                idx = None

                            # store the idx only for matrices with enough points
                            idx_list += [idx]

                    print('index list of multi-person:')
                    print(idx_list)

                    # this part is the assignment of actor in multi-person mode
                    # in one person mode, actor = 0 as default

                    # this part is the situation when the plane has not been taken off yet, so actor = none
                    if actor == None:
                        print('Actor is None in multi-person mode')
                        # the person who let the plane take off is assigned as the actor
                        if 2 in idx_list:
                            actor = idx_list.index(2)
                            idx = idx_list[actor]
                            print('take off in multi-person mode by actor:', actor)
                            idx2pose(idx)
                            print('take off in multi-person mode done')

                    # this part is entered when the plane:
                    # 1/ takes off in multi-person mode
                    # 2/ takes off in one-person mode
                    elif actor != None:
                        print('Actor is not None')
                        # what if in the first frame, person=4, and actor=4 (list index=3)
                        # in the next frame, person = 3, actor idx does not changed, list = [0,1,2] act = 3, out of range
                        # base stage:
                        # if person = 3, p-1 = 2, actor = 2 is out of range
                        if person >= 3:
                            if actor >= (person-1):
                                actor = 0
                                print('actor overflow, changed to 0')
                                # actor = 0 is still dangerous, need to be tested
                        if 4 in idx_list:
                            actor = idx_list.index(4)
                            # actor is set to be the first idx of 4 in the list(due to function ofo index)
                            # need to improve: ensure which is should be the actor
                            # why the list is full of 4? is it a bug?
                            print('actor has changed to the person who did pose 4')

                        print('ready to get the idx in [multi-person] actor mode')
                        idx = idx_list[actor]
                        print('actor has set the idx to:',idx)
                        idx2pose(idx)
                        print('actor is :',actor,'pose is:',idx)

                    # print('ready to do pose!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
                    # idx2pose(idx)
                    # print('do action in multi-person!!!!!!!!!!!!!!!!!!!!!!!')


                elif interupt & 0xFF == ord('q'):
                    cv2.destroyWindow(output_image)
                    print('drone.land()')

                elif numpy.size(keypoints)== 0: ##if UAV can't find any person,turn around until detect one person
                    print('drone.clockwise(20)')

                    # drone.quit()
                    # sleep(1)
                # if interupt & 0xFF == ord('l'):
                #     drone.land()
                #     sleep(2)
                # if interupt & 0xFF == ord('w'):
                #     drone.forward(20)
                #     # sleep(1)
                # if interupt & 0xFF == ord('s'):
                #     drone.backward(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('a'):
                #     drone.left(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('d'):
                #     drone.right(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('z'):
                #     drone.clockwise(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('c'):
                #     drone.flip_right()
                #     sleep(1)
                # if interupt & 0xFF == ord('t'):
                #     drone.takeoff()
                #     sleep(2)
                # if interupt & 0xFF == ord('u'):
                #     drone.up(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('n'):
                #     drone.down(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('v'):
                #     drone.contourclockwise(20)
                #     sleep(1)
                # if interupt & 0xFF == ord('b'):
                #     drone.flip_left()
                #     sleep(1)
                #count_frame = 10
                flags = numpy.zeros((1, 4)) # initial count of each gesture are all 0
                # print('*****       count_frame=%d       *****' % count_frame)
                # frame_skip = int((time.time() - start_time) / frame.time_base)
                frame_skip = 20

    except Exception as ex:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback)
        print(ex)
    finally:
        print('drone.quit()')
        cv2.destroyAllWindows()
Beispiel #53
0
    parser.add_argument('--model', type=str, help='path to model')
    parser.add_argument('--seed', type=int, help='random seed')
    parser.add_argument('--debug_video',
                        type=str,
                        help='path to save debug video')
    parser.add_argument('--robot_eye_video',
                        type=str,
                        help='path to save robot-eye video')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='show reward, actions, dones for each step')
    args = parser.parse_args()

    if args.robot_eye_video:
        import av
        output = av.open(args.robot_eye_video, mode='w')
        stream = output.add_stream('mpeg4', rate=13)
        stream.pix_fmt = 'yuv420p'
        stream.height, stream.width = 128, 128

    set_global_seeds(args.seed)

    env = HamstirRoomEmptyEnv(render=True, dim=128)
    if args.debug_video:
        env.logVideo(args.debug_video)
    env.seed(args.seed)
    env = DummyVecEnv([lambda: env])

    model = PPO2.load(args.model, policy=NatureLitePolicy)
    sess = model.sess
    graph = sess.graph
Beispiel #54
0
def stream_worker(source, options, segment_buffer, quit_event):  # noqa: C901
    """Handle consuming streams."""

    try:
        container = av.open(source, options=options, timeout=STREAM_TIMEOUT)
    except av.AVError:
        _LOGGER.error("Error opening stream %s",
                      redact_credentials(str(source)))
        return
    try:
        video_stream = container.streams.video[0]
    except (KeyError, IndexError):
        _LOGGER.error("Stream has no video")
        container.close()
        return
    try:
        audio_stream = container.streams.audio[0]
    except (KeyError, IndexError):
        audio_stream = None
    # These formats need aac_adtstoasc bitstream filter, but auto_bsf not
    # compatible with empty_moov and manual bitstream filters not in PyAV
    if container.format.name in {"hls", "mpegts"}:
        audio_stream = None
    # Some audio streams do not have a profile and throw errors when remuxing
    if audio_stream and audio_stream.profile is None:
        audio_stream = None

    # Iterator for demuxing
    container_packets = None
    # The decoder timestamps of the latest packet in each stream we processed
    last_dts = {video_stream: float("-inf"), audio_stream: float("-inf")}
    # Keep track of consecutive packets without a dts to detect end of stream.
    missing_dts = 0
    # The video pts at the beginning of the segment
    segment_start_pts = None
    # Because of problems 1 and 2 below, we need to store the first few packets and replay them
    initial_packets = deque()

    # Have to work around two problems with RTSP feeds in ffmpeg
    # 1 - first frame has bad pts/dts https://trac.ffmpeg.org/ticket/5018
    # 2 - seeking can be problematic https://trac.ffmpeg.org/ticket/7815

    def peek_first_pts():
        """Initialize by peeking into the first few packets of the stream.

        Deal with problem #1 above (bad first packet pts/dts) by recalculating using pts/dts from second packet.
        Also load the first video keyframe pts into segment_start_pts and check if the audio stream really exists.
        """
        nonlocal segment_start_pts, audio_stream, container_packets
        missing_dts = 0
        found_audio = False
        try:
            container_packets = container.demux((video_stream, audio_stream))
            first_packet = None
            # Get to first video keyframe
            while first_packet is None:
                packet = next(container_packets)
                if (
                        packet.dts is None
                ):  # Allow MAX_MISSING_DTS packets with no dts, raise error on the next one
                    if missing_dts >= MAX_MISSING_DTS:
                        raise StopIteration(
                            f"Invalid data - got {MAX_MISSING_DTS+1} packets with missing DTS while initializing"
                        )
                    missing_dts += 1
                    continue
                if packet.stream == audio_stream:
                    found_audio = True
                elif packet.is_keyframe:  # video_keyframe
                    first_packet = packet
                    initial_packets.append(packet)
            # Get first_pts from subsequent frame to first keyframe
            while segment_start_pts is None or (
                    audio_stream and not found_audio
                    and len(initial_packets) < PACKETS_TO_WAIT_FOR_AUDIO):
                packet = next(container_packets)
                if (
                        packet.dts is None
                ):  # Allow MAX_MISSING_DTS packet with no dts, raise error on the next one
                    if missing_dts >= MAX_MISSING_DTS:
                        raise StopIteration(
                            f"Invalid data - got {MAX_MISSING_DTS+1} packets with missing DTS while initializing"
                        )
                    missing_dts += 1
                    continue
                if packet.stream == audio_stream:
                    # detect ADTS AAC and disable audio
                    if audio_stream.codec.name == "aac" and packet.size > 2:
                        with memoryview(packet) as packet_view:
                            if packet_view[0] == 0xFF and packet_view[
                                    1] & 0xF0 == 0xF0:
                                _LOGGER.warning(
                                    "ADTS AAC detected - disabling audio stream"
                                )
                                container_packets = container.demux(
                                    video_stream)
                                audio_stream = None
                                continue
                    found_audio = True
                elif (
                        segment_start_pts is None
                ):  # This is the second video frame to calculate first_pts from
                    segment_start_pts = packet.dts - packet.duration
                    first_packet.pts = segment_start_pts
                    first_packet.dts = segment_start_pts
                initial_packets.append(packet)
            if audio_stream and not found_audio:
                _LOGGER.warning(
                    "Audio stream not found"
                )  # Some streams declare an audio stream and never send any packets
                audio_stream = None

        except (av.AVError, StopIteration) as ex:
            _LOGGER.error(
                "Error demuxing stream while finding first packet: %s",
                str(ex))
            return False
        return True

    if not peek_first_pts():
        container.close()
        return

    segment_buffer.set_streams(video_stream, audio_stream)
    segment_buffer.reset(segment_start_pts)

    while not quit_event.is_set():
        try:
            if len(initial_packets) > 0:
                packet = initial_packets.popleft()
            else:
                packet = next(container_packets)
            if packet.dts is None:
                # Allow MAX_MISSING_DTS consecutive packets without dts. Terminate the stream on the next one.
                if missing_dts >= MAX_MISSING_DTS:
                    raise StopIteration(
                        f"No dts in {MAX_MISSING_DTS+1} consecutive packets")
                missing_dts += 1
                continue
            missing_dts = 0
        except (av.AVError, StopIteration) as ex:
            _LOGGER.error("Error demuxing stream: %s", str(ex))
            break

        # Discard packet if dts is not monotonic
        if packet.dts <= last_dts[packet.stream]:
            if (packet.time_base *
                (last_dts[packet.stream] - packet.dts) > MAX_TIMESTAMP_GAP):
                _LOGGER.warning(
                    "Timestamp overflow detected: last dts %s, dts = %s, resetting stream",
                    last_dts[packet.stream],
                    packet.dts,
                )
                break
            continue

        # Update last_dts processed
        last_dts[packet.stream] = packet.dts

        # Mux packets, and possibly write a segment to the output stream.
        # This mutates packet timestamps and stream
        segment_buffer.mux_packet(packet)

    # Close stream
    segment_buffer.close()
    container.close()
Beispiel #55
0
async def test_durations(hass, record_worker_sync):
    """Test that the duration metadata matches the media."""

    # Use a target part duration which has a slight mismatch
    # with the incoming frame rate to better expose problems.
    target_part_duration = TEST_PART_DURATION - 0.01
    await async_setup_component(
        hass,
        "stream",
        {
            "stream": {
                CONF_LL_HLS: True,
                CONF_SEGMENT_DURATION: SEGMENT_DURATION,
                CONF_PART_DURATION: target_part_duration,
            }
        },
    )

    source = generate_h264_video(duration=SEGMENT_DURATION + 1)
    stream = create_stream(hass, source, {}, stream_label="camera")

    # use record_worker_sync to grab output segments
    with patch.object(hass.config, "is_allowed_path", return_value=True):
        await stream.async_record("/example/path")

    complete_segments = list(await record_worker_sync.get_segments())[:-1]
    assert len(complete_segments) >= 1

    # check that the Part duration metadata matches the durations in the media
    running_metadata_duration = 0
    for segment in complete_segments:
        av_segment = av.open(io.BytesIO(segment.init + segment.get_data()))
        av_segment.close()
        for part_num, part in enumerate(segment.parts):
            av_part = av.open(io.BytesIO(segment.init + part.data))
            running_metadata_duration += part.duration
            # av_part.duration actually returns the dts of the first packet of the next
            # av_part. When we normalize this by av.time_base we get the running
            # duration of the media.
            # The metadata duration may differ slightly from the media duration.
            # The worker has some flexibility of where to set each metadata boundary,
            # and when the media's duration is slightly too long or too short, the
            # metadata duration may be adjusted up or down.
            # We check here that the divergence between the metadata duration and the
            # media duration is not too large (2 frames seems reasonable here).
            assert math.isclose(
                (av_part.duration - av_part.start_time) / av.time_base,
                part.duration,
                abs_tol=2 / av_part.streams.video[0].rate + 1e-6,
            )
            # Also check that the sum of the durations so far matches the last dts
            # in the media.
            assert math.isclose(
                running_metadata_duration,
                av_part.duration / av.time_base,
                abs_tol=1e-6,
            )
            # And check that the metadata duration is between 0.85x and 1.0x of
            # the part target duration
            if not (part.has_keyframe or part_num == len(segment.parts) - 1):
                assert part.duration > 0.85 * target_part_duration - 1e-6
            assert part.duration < target_part_duration + 1e-6
            av_part.close()
    # check that the Part durations are consistent with the Segment durations
    for segment in complete_segments:
        assert math.isclose(
            sum(part.duration for part in segment.parts),
            segment.duration,
            abs_tol=1e-6,
        )

    await record_worker_sync.join()

    stream.stop()
Beispiel #56
0
import av
import cv2

arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-r', '--rate', default='23.976')
arg_parser.add_argument('-f', '--format', default='yuv420p')
arg_parser.add_argument('-w', '--width', type=int)
arg_parser.add_argument('--height', type=int)
arg_parser.add_argument('-b', '--bitrate', type=int, default=8000000)
arg_parser.add_argument('-c', '--codec', default='mpeg4')
arg_parser.add_argument('inputs', nargs='+')
arg_parser.add_argument('output', nargs=1)
args = arg_parser.parse_args()

output = av.open(args.output[0], 'w')
stream = output.add_stream(args.codec, args.rate)
stream.bit_rate = args.bitrate
stream.pix_fmt = args.format

for i, path in enumerate(args.inputs):

    print(os.path.basename(path))

    img = cv2.imread(path)

    if not i:
        stream.height = args.height or (args.width * img.shape[0] /
                                        img.shape[1]) or img.shape[0]
        stream.width = args.width or img.shape[1]
def _decode_frames_by_av_module(
    full_path,
    video_start_pts=0,
    video_end_pts=None,
    audio_start_pts=0,
    audio_end_pts=None,
):
    """
    Use PyAv to decode video frames. This provides a reference for our decoder
    to compare the decoding results.
    Input arguments:
        full_path: video file path
        video_start_pts/video_end_pts: the starting/ending Presentation TimeStamp where
            frames are read
    """
    if video_end_pts is None:
        video_end_pts = float('inf')
    if audio_end_pts is None:
        audio_end_pts = float('inf')
    container = av.open(full_path)

    video_frames = []
    vtimebase = torch.zeros([0], dtype=torch.int32)
    if container.streams.video:
        video_frames = _read_from_stream(
            container,
            video_start_pts,
            video_end_pts,
            container.streams.video[0],
            {"video": 0},
        )
        # container.streams.video[0].average_rate is not a reliable estimator of
        # frame rate. It can be wrong for certain codec, such as VP80
        # So we do not return video fps here
        vtimebase = _fraction_to_tensor(container.streams.video[0].time_base)

    audio_frames = []
    atimebase = torch.zeros([0], dtype=torch.int32)
    if container.streams.audio:
        audio_frames = _read_from_stream(
            container,
            audio_start_pts,
            audio_end_pts,
            container.streams.audio[0],
            {"audio": 0},
        )
        atimebase = _fraction_to_tensor(container.streams.audio[0].time_base)

    container.close()
    vframes = [frame.to_rgb().to_ndarray() for frame in video_frames]
    vframes = torch.as_tensor(np.stack(vframes))

    vframe_pts = torch.tensor([frame.pts for frame in video_frames],
                              dtype=torch.int64)

    aframes = [frame.to_ndarray() for frame in audio_frames]
    if aframes:
        aframes = np.transpose(np.concatenate(aframes, axis=1))
        aframes = torch.as_tensor(aframes)
    else:
        aframes = torch.empty((1, 0), dtype=torch.float32)

    aframe_pts = torch.tensor(
        [audio_frame.pts for audio_frame in audio_frames], dtype=torch.int64)

    return DecoderResult(
        vframes=vframes,
        vframe_pts=vframe_pts,
        vtimebase=vtimebase,
        aframes=aframes,
        aframe_pts=aframe_pts,
        atimebase=atimebase,
    )
Beispiel #58
0
    save_path = os.path.join(path, 'sorted')
    video = video_file
    results_name = os.path.split(video_file)[-1].split('.')[0] + '_pred.pickle'

    log.info("saving analysis to: {0}".format(os.path.join(save_path, results_name)))

    # define empty lists to hold params (cnn)
    a_cnn = []
    b_cnn = []
    x_cnn = []
    y_cnn = []
    phi_cnn = []
    lx=[];ly=[];tx=[];ty=[];rx=[];ry=[];bx=[];by=[]

    # reopen the video with pyav
    container = av.open(video)
    video_stream = [s for s in container.streams][0]

    for i, packet in enumerate(container.demux(video_stream)):
        if i % 1000 == 0:
            log.info("frame: {0}...".format(i))
            nd.update_job_tick()
        try:
            frame = packet.decode()[0]

            frame_ = np.asarray(frame.to_image().convert('LA'))
            frame_ = frame_[:, :-10, :]
            frame_ = frame_ - np.min(frame_)

            if frame_.shape[-1] > 1:
                frame_ = frame_[:, :, 0]
Beispiel #59
0
def open_video(filename):
    try:
        return av.open(filename)
    except UnicodeDecodeError:
        print("Opening with metadata encoding latin-1", file=sys.stderr)
        return av.open(filename, metadata_encoding="latin-1")
def save_2d_keypoints_and_images(video_name, video_path, npy_path,
                                 rgb_skeleton_data, frame_time_dict):
    mismatch_count = 0
    ## cap = cv2.VideoCapture(video_path)
    ## assert(cap.isOpened() == True)

    container = av.open(video_path)

    for k, fr in enumerate(container.decode(video=0)):
        assert (k == fr.index)
        ## for k in frame_time_dict.keys():
        nearest_idx, nearest_time = find_nearest_frameindex_from_skeleton_file(
            rgb_skeleton_data[..., 0],
            frame_time_dict[k])  # take column 0 (time) from rgb data
        # print("k (video frame) ", k, "\t time", frame_time_dict[k], "\t nearest_idx from skeleton file", nearest_idx, "\t nearest_time", nearest_time)  # print("k=>", k, nearest_idx, "<= nearest_idx")

        if (abs(frame_time_dict[k] - nearest_time) >
                1000000):  # 100 ns ticks, so 1000000 = 0.1sec
            mismatch_count += 1
            continue  # do not add the nearest found index if the difference is really big (>0.1sec)
        else:
            # print(rgb_skeleton_data[nearest_idx])
            if (np.inf not in rgb_skeleton_data[nearest_idx]
                ):  # do not add if there is np.inf in the line

                ## cap.set(cv2.CAP_PROP_POS_FRAMES, k)
                ## success, frame = cap.read()  # frame is read as (h, w, c)

                success = True  # hard-coded for PyAV
                frame = fr.to_image()
                # converting PIL (<class 'PIL.Image.Image'>) to <class 'numpy.ndarray'>
                img = np.asarray(frame)  # h, w, c

                if success:
                    os.makedirs(os.path.join(npy_path, video_name),
                                exist_ok=True)
                    save_dir = os.path.join(npy_path, video_name)

                    # 1
                    # save image with the original resolution
                    # print("kth frame =", k, frame.shape, "\n")
                    # cv2.imwrite(os.path.join(save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + '.jpg'), frame)

                    # 2
                    # save downsampled image

                    ## bgr to rgb
                    ## img = frame[...,::-1]
                    img_central = img[:, 240:(1920 - 240), :]
                    # downsample by 4.5
                    img_down = pyramid_reduce(
                        img_central, downscale=4.5)  # better than resize
                    # print("img_down shape (h, w, c)", img_down.shape)  # height, width, channels (rgb)
                    skimage.io.imsave(
                        os.path.join(
                            save_dir, video_name + "_vfr_" + str(k) +
                            "_skfr_" + str(nearest_idx) + "_240x320.jpg"),
                        img_down)

                    # 3
                    # save heatmaps and pafs
                    sk_keypoints_with_tracking_info = rgb_skeleton_data[
                        nearest_idx][1:]  # ignore index 0 (time)
                    sk_keypoints = np.delete(
                        sk_keypoints_with_tracking_info,
                        np.arange(0, sk_keypoints_with_tracking_info.size, 3)
                    )  # this is without tracking info, by removing the tracking info
                    # print("sk_kp shape =", sk_keypoints.shape)  # (38, )

                    # for 20 (actually 19 + background) heatmaps =====================================
                    for kpn in range(sk_keypoints.shape[0] // 2):
                        kpx = sk_keypoints[2 * kpn]
                        kpy = sk_keypoints[2 * kpn + 1]  # print(kpx, kpy)

                        index_array = np.zeros((240 // ground_truth_factor,
                                                320 // ground_truth_factor, 2))
                        for i in range(index_array.shape[0]):
                            for j in range(index_array.shape[1]):
                                index_array[i][j] = [
                                    i, j
                                ]  # height (y), width (x) => index_array[:,:,0] = y pixel coordinate and index_array[:,:,1] = x

                        if kpn == 0:
                            heatmap = get_heatmap(
                                index_array, kpx_kpy_transformer([kpx, kpy])
                            )  # /4 because image is 1080 x 1920 and so are the original pixel locations of the keypoints
                        else:
                            heatmap = np.dstack(
                                (heatmap,
                                 get_heatmap(index_array,
                                             kpx_kpy_transformer([kpx, kpy]))))
                        # print("heatmap.shape =", heatmap.shape)

                    # generate background heatmap
                    maxed_heatmap = np.max(
                        heatmap[:, :, :], axis=2
                    )  # print("maxed_heatmap.shape = ", maxed_heatmap.shape)

                    heatmap = np.dstack((heatmap, 1 - maxed_heatmap))
                    # print("final heatmap.shape =", heatmap.shape)
                    np.save(
                        os.path.join(
                            save_dir, video_name + "_vfr_" + str(k) +
                            "_skfr_" + str(nearest_idx) + "_heatmap30x40.npy"),
                        heatmap)

                    # for 18x2 PAFs =====================================
                    for n, pair in enumerate(paf_pairs_indices):
                        # print("writing paf for index", n, pair)
                        index_array = np.zeros((240 // ground_truth_factor,
                                                320 // ground_truth_factor, 2))
                        for i in range(index_array.shape[0]):
                            for j in range(index_array.shape[1]):
                                index_array[i][j] = [
                                    i, j
                                ]  # height (y), width (x) => index_array[:,:,0] = y pixel coordinate and index_array[:,:,1] = x

                        if n == 0:
                            paf = get_pafx_pafy(
                                index_array,
                                kp0xy=kpx_kpy_transformer([
                                    sk_keypoints[2 * pair[0]],
                                    sk_keypoints[2 * pair[0] + 1]
                                ]),
                                kp1xy=kpx_kpy_transformer([
                                    sk_keypoints[2 * pair[1]],
                                    sk_keypoints[2 * pair[1] + 1]
                                ]))
                        else:
                            paf = np.dstack(
                                (paf,
                                 get_pafx_pafy(
                                     index_array,
                                     kp0xy=kpx_kpy_transformer([
                                         sk_keypoints[2 * pair[0]],
                                         sk_keypoints[2 * pair[0] + 1]
                                     ]),
                                     kp1xy=kpx_kpy_transformer([
                                         sk_keypoints[2 * pair[1]],
                                         sk_keypoints[2 * pair[1] + 1]
                                     ]))))
                        # print("paf.shape =", paf.shape)

                    # print("final paf.shape =========================", paf.shape)
                    np.save(
                        os.path.join(
                            save_dir, video_name + "_vfr_" + str(k) +
                            "_skfr_" + str(nearest_idx) + "_paf30x40.npy"),
                        paf)

                    # 4
                    # save the 2d keypoints of shape (38,)
                    # print(rgb_skeleton_data[nearest_idx])
                    # print(save_dir, os.path.join("", video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + '.npy'))
                    np.save(
                        os.path.join(
                            save_dir, video_name + "_vfr_" + str(k) +
                            "_skfr_" + str(nearest_idx) + '.npy'),
                        rgb_skeleton_data[nearest_idx][1:]
                    )  # index 0 is time # saving all 57 values 19 * 3 (tracking, x, y)

    ## cap.release()
    print("mismatch_count =", mismatch_count)