def test_rgb_rotate(self): path = self.sandboxed('rgb_rotate.mov') output = av.open(path, 'w') write_rgb_rotate(output) assert_rgb_rotate(self, av.open(path))
def test_error_attributes(self): try: av.open('does not exist') except AVError as e: self.assertEqual(e.errno, 2) if is_windows: self.assertTrue(e.strerror in ['Error number -2 occurred', 'No such file or directory']) else: self.assertEqual(e.strerror, 'No such file or directory') self.assertEqual(e.filename, 'does not exist') else: self.fail('no exception raised')
def test_buffer_read_write(self): buffer_ = StringIO() wrapped = MethodLogger(buffer_) write_rgb_rotate(av.open(wrapped, 'w', 'mp4')) # Make sure it did actually write. writes = wrapped._filter('write') self.assertTrue(writes) self.assertTrue(buffer_.tell()) # Standard assertions. buffer_.seek(0) assert_rgb_rotate(self, av.open(buffer_))
def main(): inp = av.open('piano2.wav', 'r') out = av.open('piano2.mp3', 'w') ostream = out.add_stream("mp3") for frame in inp.decode(audio=0): frame.pts = None for p in ostream.encode(frame): out.mux(p) for p in ostream.encode(None): out.mux(p) out.close()
def __init__(self, file, toc=None, format=None): if not hasattr(file, 'read'): file = str(file) self.file = file self.format = format self._container = None with av.open(self.file, format=self.format) as container: # Build a toc if toc is None: self._toc = np.cumsum([len(packet.decode()) for packet in container.demux() if packet.stream.type == 'video']) else: if isinstance(toc, list): self._toc = np.array(toc, dtype=np.int64) else: self._toc = toc self._len = self._toc[-1] video_stream = [s for s in container.streams if s.type == 'video'][0] # PyAV always returns frames in color, and we make that # assumption in get_frame() later below, so 3 is hardcoded here: self._im_sz = video_stream.height, video_stream.width, 3 self._load_fresh_file()
def read_video(v_name): """A workaround function for reading video. Apparently precompiled OpenCV couldn't read AVI videos on Mac OS X and Linux, therefore I use PyAV, a ffmpeg binding to extract video frames Parameters ---------- v_name : string absolute path to video Returns ------- frames : list An ordered list for storing frames num_frames : int number of frames in the video """ container = av.open(v_name) video = next(s for s in container.streams if s.type == b'video') frames = [] for packet in container.demux(video): for frame in packet.decode(): frame_t = np.array(frame.to_image()) frames.append(cv2.cvtColor(frame_t, cv2.COLOR_RGB2BGR)) return frames, len(frames)
def write_thumbnails(self, m3u8_obj, url): for p in m3u8_obj.playlists: quality = p.media[0].name if quality == 'Source': uri = p.uri r = requests.get(uri) last_line = r.text.split('\n')[-2] parsed_uri = urllib.parse.urlparse(uri) short_path = '/'.join(parsed_uri.path.split('/')[:-1]) ts_uri = '{uri.scheme}://{uri.netloc}{short_path}/{ts_path}'.format(uri=parsed_uri, short_path=short_path, ts_path=last_line) ts_r = requests.get(ts_uri, stream=True) if ts_r.status_code == 200: with open(self.ts_path, 'wb') as f: for chunk in ts_r: f.write(chunk) import av container = av.open(self.ts_path) video = next(s for s in container.streams if s.type == 'video') for packet in container.demux(video): for frame in packet.decode(): im = frame.to_image() im_tn = frame.to_image() im_tn.thumbnail((240, 135)) for id in self.ids: im.save(self.original_path.format(streamer=self.streamer, id=id)) im_tn.save(self.thumbnail_path.format(streamer=self.streamer, id=id)) return
def __init__(self, filename, cache_size=16, fast_forward_thresh=32, stream_index=0): self.filename = str(filename) self._container = av.open(self.filename) if len(self._container.streams.video) == 0: raise IOError("No valid video stream found in {}".format(filename)) self._stream = self._container.streams.video[stream_index] try: self._duration = self._stream.duration * self._stream.time_base except TypeError: self._duration = self._container.duration / av.time_base self._frame_rate = self._stream.average_rate if self.duration <= 0 or len(self) <= 0: raise IOError("Video stream {} in {} has zero length.".format(stream_index, filename)) self._cache = [None] * cache_size self._fast_forward_thresh = fast_forward_thresh demuxer = self._container.demux(streams=self._stream) # obtain first frame to get first time point # also tests for the presence of timestamps frame = next(_gen_frames(demuxer, self._stream.time_base)) self._first_pts = frame.metadata['timestamp'] frame = WrapPyAvFrame(frame.frame, 0, frame.metadata) self._cache[0] = frame self._frame_shape = (self._stream.height, self._stream.width, 3) self._last_frame = 0 self._reset_demuxer()
def writeFrames(): global frameCount output = av.open(file='temp4.flv', mode='w') stream = output.add_stream("libx264", 15) stream.bit_rate = 8000000 stream.pix_fmt = "yuv420p" stream.width = 640 stream.height = 480 print "stream is a: %s" % type(stream) while not exitFlag: temp=q.get() temp.save('test/%04d.jpg' % frameCount) frame=av.VideoFrame.from_image(temp) print "%d" % frame.index #temp=temp.reformat(format="yuv420p") #print "Frame: %s" % temp packet = stream.encode(frame) #print "packet: %s" % packet if (type(packet) == "av.packet.Packet"): print "Found a packet, Muxing." output.mux(packet) else: print "Not a packet. Not counting" frameCount +=1 packet = stream.encode() output.mux(packet) output.close()
def read_video(fn, start_frame=None, duration=None): """Takes a path to a video file. Return a generator which will generator a PIL image for each video frame. If start_frame is not None, an attempt is made to seek to that (0-indexed) frame. If duration is not None, yield at most that many frames. """ LOG.info('Opening video file: {0}'.format(fn)) container = av.open(fn) n_frames_yielded = 0 frame_idx = 0 if start_frame is None else start_frame container.seek(frame_idx, 'frame') stream = next(s for s in container.streams if s.type == 'video') for packet in container.demux(stream): for frame in packet.decode(): if duration is not None and n_frames_yielded >= duration: return # Re-format frame frame = frame.reformat(frame.width, frame.height, 'rgb24') # Yield frame and frame index yield frame_idx, frame.to_image() frame_idx += 1 n_frames_yielded += 1
def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, use_timestamps=False): super().__init__() self.use_timestamps = use_timestamps self.timestamps = [] # the approximate capture rate. self.fps = int(fps) directory, video_file = os.path.split(file_loc) name, ext = os.path.splitext(video_file) if ext not in ('.mp4', '.mov', '.mkv'): logger.warning("media file container should be mp4 or mov. Using a different container is risky.") self.file_loc = file_loc self.container = av.open(self.file_loc, 'w') logger.debug("Opened '{}' for writing.".format(self.file_loc)) if self.use_timestamps: self.time_base = Fraction(1, 65535) # highest resolution for mp4 else: self.time_base = Fraction(1000, self.fps*1000) # timebase is fps self.video_stream = self.container.add_stream(video_stream['codec'], 1/self.time_base) self.video_stream.bit_rate = video_stream['bit_rate'] self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20 self.video_stream.thread_count = 1 # self.video_stream.pix_fmt = "yuv420p" self.configured = False self.start_time = None self.current_frame_idx = 0
def __init__(self, file_loc, fps=30): super().__init__() # the approximate capture rate. self.fps = int(fps) self.time_base = Fraction(1000, self.fps * 1000) self.timestamps = [] directory, video_file = os.path.split(file_loc) name, ext = os.path.splitext(video_file) if ext not in (".mp4"): logger.warning( "media file container should be mp4. Using a different container is risky." ) self.file_loc = file_loc self.container = av.open(self.file_loc, "w") logger.debug("Opened '{}' for writing.".format(self.file_loc)) self.video_stream = self.container.add_stream("mjpeg", 1 / self.time_base) self.video_stream.pix_fmt = "yuvj422p" self.video_stream.time_base = self.time_base self.configured = False self.frame_count = 0 self.write_video_frame_compressed = self.write_video_frame
def set_file(self, path): self.file = av.open(path) self.stream = next(s for s in self.file.streams if s.type == b'video') self.rate = get_frame_rate(self.stream) self.time_base = float(self.stream.time_base) index, first_frame = next(self.next_frame()) self.stream.seek(self.stream.start_time) # find the pts of the first frame index, first_frame = next(self.next_frame()) if self.pts_seen: pts = first_frame.pts else: pts = first_frame.dts self.start_time = pts or first_frame.dts print "First pts", pts, self.stream.start_time, first_frame #self.nb_frames = get_frame_count(self.file, self.stream) self.nb_frames = self.get_frame_count() dur = None if self.stream.duration: dur = self.stream.duration * self.time_base else: dur = self.file.duration * 1.0 / float(AV_TIME_BASE) self.update_frame_range.emit(dur, self.rate)
def decode_using_pyav(): print('Decoding using PyAV.') fh = av.open('ffv1_level3.nut', 'r', options={'refcounted_frames': '1'}) for s in fh.streams: #print s, s.thread_type, s.thread_count #pass print('Thread count:', s.thread_count) #s.thread_count = 1 #s.thread_type = 'frame' count = 0 packet_iter = fh.demux() while True: #av.utils._debug_enter('__main__.demux') packet = next(packet_iter) #av.utils._debug_exit() #av.utils._debug_enter('__main__.decode') frames = packet.decode() #av.utils._debug_exit() for frame in frames: count += 1 print(count, end=' ') tick() if not count % 100: gc.collect() #print 'GARBAGE:', gc.get_count(), len(gc.garbage) if count >= 10000: return
def test_stream_index(self): output = av.open(self.sandboxed('output.mov'), 'w') vstream = output.add_stream('mpeg4', 24) vstream.pix_fmt = 'yuv420p' vstream.width = 320 vstream.height = 240 astream = output.add_stream('mp2', 48000) astream.channels = 2 astream.format = 's16' self.assertEqual(vstream.index, 0) self.assertEqual(astream.index, 1) vframe = VideoFrame(320, 240, 'yuv420p') vpacket = vstream.encode(vframe)[0] self.assertIs(vpacket.stream, vstream) self.assertEqual(vpacket.stream_index, 0) for i in range(10): aframe = AudioFrame('s16', 'stereo', samples=astream.frame_size) aframe.rate = 48000 apackets = astream.encode(aframe) if apackets: apacket = apackets[0] break self.assertIs(apacket.stream, astream) self.assertEqual(apacket.stream_index, 1)
def convert(): global args container = av.open(args.video) video = next(s for s in container.streams if s.type == b'video') pc = 0 is_last = False for packet in container.demux(video): if is_last: break for frame in packet.decode(): export_frame = True # is this frame smaller than start frame (if defined) if args.sf > 0 and frame.index < args.sf: export_frame = False # is this frame bigger than end frame (if defined) if args.ef > 0 and frame.index > args.ef: export_frame = False is_last = True break # check is this frame amount of steps if args.fs > 0 and not pc%args.fs == 0: export_frame = False filename = '%s/%s%06d.jpg' % (args.outputdir, args.fileprefix,frame.index) # print "export %s file=%s, index=%s, pc=%s %s" % (export_frame,filename,frame.index,pc, (pc%args.fs)) pc+=1 if not export_frame: continue print "export file=%s, frame index=%s" % (filename,frame.index) frame.to_image().save(filename)
def __init__(self, file_loc,fps=30): super(JPEG_Writer, self).__init__() # the approximate capture rate. self.fps = int(fps) self.time_base = Fraction(1000,self.fps*1000) file_loc = str(file_loc) #force str over unicode. try: file_path,ext = file_loc.rsplit('.', 1) except: logger.error("'%s' is not a valid media file name."%file_loc) raise Exception("Error") if ext not in ('mp4'): logger.warning("media file container should be mp4. Using a different container is risky.") self.file_loc = file_loc self.container = av.open(self.file_loc,'w') logger.debug("Opended '%s' for writing."%self.file_loc) self.video_stream = self.container.add_stream('mjpeg',1/self.time_base) self.video_stream.pix_fmt = "yuvj422p" self.configured = False self.frame_count = 0 self.write_video_frame_compressed = self.write_video_frame
def __init__(self, file_loc, video_stream={'codec':'mpeg4','bit_rate': 8000*10e3}, audio_stream=None): super(AV_Writer, self).__init__() try: file_path,ext = file_loc.rsplit('.', 1) except: logger.error("'%s' is not a valid media file name."%file_loc) raise Exception("Error") if ext not in ('mp4,mov,mkv'): logger.warning("media file container should be mp4 or mov. Using a different container is risky.") self.ts_file_loc = file_path+'_timestamps_pts.npy' self.file_loc = file_loc self.container = av.open(self.file_loc,'w') logger.debug("Opended '%s' for writing."%self.file_loc) self.time_resolution = 1000 # time_base in milliseconds self.time_base = Fraction(1,self.time_resolution) self.video_stream = self.container.add_stream(video_stream['codec'],self.time_resolution) self.video_stream.bit_rate = video_stream['bit_rate'] self.video_stream.thread_count = 1 # self.video_stream.pix_fmt = "yuv420p"#video_stream['format'] self.configured = False self.start_time = None self.timestamps_list = []
def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, audio_stream=None,use_timestamps=False): super(AV_Writer, self).__init__() self.use_timestamps = use_timestamps # the approximate capture rate. self.fps = int(fps) file_loc = str(file_loc) #force str over unicode. try: file_path,ext = file_loc.rsplit('.', 1) except: logger.error("'%s' is not a valid media file name."%file_loc) raise Exception("Error") if ext not in ('mp4,mov,mkv'): logger.warning("media file container should be mp4 or mov. Using a different container is risky.") self.ts_file_loc = file_path+'_timestamps_pts.npy' self.file_loc = file_loc self.container = av.open(self.file_loc,'w') logger.debug("Opended '%s' for writing."%self.file_loc) if self.use_timestamps: self.time_base = Fraction(1,65535) #highest resolution for mp4 else: self.time_base = Fraction(1000,self.fps*1000) #timebase is fps self.video_stream = self.container.add_stream(video_stream['codec'],1/self.time_base) self.video_stream.bit_rate = video_stream['bit_rate'] self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20 self.video_stream.thread_count = 1 # self.video_stream.pix_fmt = "yuv420p" self.configured = False self.start_time = None self.current_frame_idx = 0
def readFrames(): container = av.open(format="video4linux2",file='/dev/video0') video = next(s for s in container.streams if s.type == b'video') for packet in container.demux(video): for frame in packet.decode(): #frame.to_image().save('frame-%04d.jpg' % frame.index) q.put(frame.to_image())
def _configure(self, camera_num=0, **options): self.camera_num = camera_num self.options = options container = av.open('/dev/video{}'.format(self.camera_num), 'r','video4linux2', self.options) stream = next(s for s in container.streams if s.type == 'video') self.output.spec['shape'] = (stream.format.height, stream.format.width, 3) self.output.spec['sample_rate'] = float(stream.average_rate)
def test_writing(self): path = self.sandboxed('writing.mov') with open(path, 'wb') as fh: wrapped = MethodLogger(fh) output = av.open(wrapped, 'w', 'mov') write_rgb_rotate(output) output.close() fh.close() # Make sure it did actually write. writes = wrapped._filter('write') self.assertTrue(writes) # Standard assertions. assert_rgb_rotate(self, av.open(path))
def __init__(self,vidqueue,filename): super(VideoDecoder, self).__init__() self.appId = None self.running = False self.queue = vidqueue self.filename = filename self.container = av.open(filename) self.video = self.container.streams[1]
def start(self,file_loc, audio_src): self.should_close.clear() if platform.system() == "Darwin": in_container = av.open('none:%s'%audio_src,format="avfoundation") else: in_container = None self.thread = Thread(target=rec_thread, args=(file_loc,in_container, audio_src,self.should_close)) self.thread.start()
def convert_pupil_mobile_recording_to_v094(rec_dir): logger.info("Converting Pupil Mobile recording to v0.9.4 format") # convert time files and rename corresponding videos time_pattern = os.path.join(rec_dir, '*.time') for time_loc in glob.glob(time_pattern): time_file_name = os.path.split(time_loc)[1] time_name, time_ext = os.path.splitext(time_file_name) potential_locs = [os.path.join(rec_dir, time_name+ext) for ext in ('.mjpeg', '.mp4','.m4a')] existing_locs = [loc for loc in potential_locs if os.path.exists(loc)] if not existing_locs: continue else: video_loc = existing_locs[0] if time_name in ('Pupil Cam1 ID0', 'Pupil Cam1 ID1'): time_name = 'eye'+time_name[-1] # rename eye files elif time_name in ('Pupil Cam1 ID2', 'Logitech Webcam C930e'): cam_calib_loc = os.path.join(rec_dir, 'camera_calibration') try: camera_calibration = load_object(cam_calib_loc) except: # no camera calibration found video = av.open(video_loc, 'r') frame_size = video.streams.video[0].format.width, video.streams.video[0].format.height del video try: camera_calibration = pre_recorded_calibrations[time_name][frame_size] except KeyError: camera_calibration = idealized_camera_calibration(frame_size) logger.warning('Camera calibration not found. Will assume idealized camera.') save_object(camera_calibration, cam_calib_loc) time_name = 'world' # assume world file elif time_name.startswith('audio_'): time_name = 'audio' timestamps = np.fromfile(time_loc, dtype='>f8') timestamp_loc = os.path.join(rec_dir, '{}_timestamps.npy'.format(time_name)) logger.info('Creating "{}"'.format(os.path.split(timestamp_loc)[1])) np.save(timestamp_loc, timestamps) if time_name == 'audio': video_dst = os.path.join(rec_dir, time_name) + '.mp4' logger.info('Renaming "{}" to "{}"'.format(os.path.split(video_loc)[1], os.path.split(video_dst)[1])) os.rename(video_loc, video_dst) else: video_dst = os.path.join(rec_dir, time_name) + os.path.splitext(video_loc)[1] logger.info('Renaming "{}" to "{}"'.format(os.path.split(video_loc)[1], os.path.split(video_dst)[1])) os.rename(video_loc, video_dst) pupil_data_loc = os.path.join(rec_dir, 'pupil_data') if not os.path.exists(pupil_data_loc): logger.info('Creating "pupil_data"') save_object({'pupil_positions': [], 'gaze_positions': [], 'notifications': []}, pupil_data_loc)
def __init__(self, file_loc,fps=30, video_stream={'codec':'mpeg4','bit_rate': 15000*10e3}, audio_loc=None, use_timestamps=False): super().__init__() self.use_timestamps = use_timestamps self.timestamps = [] # the approximate capture rate. self.fps = int(fps) directory, video_file = os.path.split(file_loc) name, ext = os.path.splitext(video_file) if ext not in ('.mp4', '.mov', '.mkv'): logger.warning("media file container should be mp4 or mov. Using a different container is risky.") self.file_loc = file_loc self.container = av.open(self.file_loc, 'w') logger.debug("Opened '{}' for writing.".format(self.file_loc)) if self.use_timestamps: self.time_base = Fraction(1, 65535) # highest resolution for mp4 else: self.time_base = Fraction(1000, self.fps*1000) # timebase is fps if audio_loc: audio_dir = os.path.split(audio_loc)[0] audio_ts_loc = os.path.join(audio_dir, 'audio_timestamps.npy') audio_exists = os.path.exists(audio_loc) and os.path.exists(audio_ts_loc) if audio_exists: self.audio_rec = av.open(audio_loc) self.audio_ts = np.load(audio_ts_loc) self.audio_export = self.container.add_stream(template=self.audio_rec.streams.audio[0]) else: logger.warning('Could not mux audio. File not found.') self.audio_export = False else: self.audio_export = False self.video_stream = self.container.add_stream(video_stream['codec'], 1/self.time_base) self.video_stream.bit_rate = video_stream['bit_rate'] self.video_stream.bit_rate_tolerance = video_stream['bit_rate']/20 self.video_stream.thread_count = 1 # self.video_stream.pix_fmt = "yuv420p" self.configured = False self.start_time = None self.current_frame_idx = 0 self.audio_packets_decoded = 0
def rec_thread(file_loc, in_container, audio_src,should_close): # print sys.modules['av'] # import av if not in_container: #create in container if platform.system() == "Darwin": in_container = av.open('none:%s'%audio_src,format="avfoundation") elif platform.system() == "Linux": in_container = av.open('hw:%s'%audio_src,format="alsa") in_stream = None # print len(in_container.streams), 'stream(s):' for i, stream in enumerate(in_container.streams): if stream.type == 'audio': # print '\t\taudio:' # print '\t\t\tformat:', stream.format # print '\t\t\tchannels: %s' % stream.channels in_stream = stream break if in_stream is None: # logger.error("No input audio stream found.") return #create out container out_container = av.open(file_loc,'w') # logger.debug("Opended '%s' for writing."%file_loc) out_stream = out_container.add_stream(template = in_stream) for packet in in_container.demux(in_stream): # for frame in packet.decode(): # packet = out_stream.encode(frame) # if packet: # print '%r' %packet # print '\tduration: %s' % format_time(packet.duration, packet.stream.time_base) # print '\tpts: %s' % format_time(packet.pts, packet.stream.time_base) # print '\tdts: %s' % format_time(packet.dts, packet.stream.time_base) out_container.mux(packet) if should_close.is_set(): break out_container.close()
def test_audio_default_options(self): output = av.open(self.sandboxed('output.mov'), 'w') stream = output.add_stream('mp2') self.assertEqual(stream.bit_rate, 128000) self.assertEqual(stream.format.name, 's16') self.assertEqual(stream.rate, 48000) self.assertEqual(stream.ticks_per_frame, 1) self.assertEqual(stream.time_base, None)
def test_selection(self): container = av.open(fate_suite('h264/interlaced_crop.mp4')) video = container.streams.video[0] # audio_stream = container.streams.audio[0] # audio_streams = list(container.streams.audio[0:2]) self.assertEqual([video], container.streams.get(video=0)) self.assertEqual([video], container.streams.get(video=(0, )))
def image_sequence_encode(self, codec_name): try: codec = Codec(codec_name, 'w') except UnknownCodecError: raise SkipTest() container = av.open(fate_suite('h264/interlaced_crop.mp4')) video_stream = container.streams.video[0] width = 640 height = 480 ctx = codec.create() pix_fmt = ctx.codec.video_formats[0].name ctx.width = width ctx.height = height ctx.time_base = video_stream.codec_context.time_base ctx.pix_fmt = pix_fmt ctx.open() frame_count = 1 path_list = [] for frame in iter_frames(container, video_stream): new_frame = frame.reformat(width, height, pix_fmt) new_packets = ctx.encode(new_frame) self.assertEqual(len(new_packets), 1) new_packet = new_packets[0] path = self.sandboxed('%s/encoder.%04d.%s' % ( codec_name, frame_count, codec_name if codec_name != 'mjpeg' else 'jpg', )) path_list.append(path) with open(path, 'wb') as f: f.write(new_packet) frame_count += 1 if frame_count > 5: break ctx = av.Codec(codec_name, 'r').create() for path in path_list: with open(path, 'rb') as f: size = os.fstat(f.fileno()).st_size packet = Packet(size) size = f.readinto(packet) frame = ctx.decode(packet)[0] self.assertEqual(frame.width, width) self.assertEqual(frame.height, height) self.assertEqual(frame.format.name, pix_fmt)
def main(): drone = tellopy.Tello() os.makedirs('raw_data', exist_ok=True) #生データの保存するディレクトリの作成 os.makedirs('take_picture', exist_ok=True) #撮影時のディレクトリ os.makedirs('process_picture', exist_ok=True) #撮影時の加工画像を入れるディレクトリ SCREEN_WIDTH = 640 SCREEN_HEIGHT = 480 pygame.joystick.init() try: joy = pygame.joystick.Joystick(0) # create a joystick instance joy.init() # init instance print('Joystickの名称: ' + joy.get_name()) print('ボタン数 : ' + str(joy.get_numbuttons())) pygame.init() screen = pygame.display.set_mode( (SCREEN_WIDTH, SCREEN_HEIGHT)) # 画面を作る pygame.display.set_caption('Joystick') # タイトル pygame.display.flip() # 画面を反映 except pygame.error: print('Joystickが見つかりませんでした。') try: drone.connect() drone.wait_for_connection(20.0) retry = 3 container = None while container is None and 0 < retry: retry -= 1 try: container = av.open(drone.get_video_stream()) except av.AVError as ave: print(ave) print('retry...') fly_sw = False #takeoffとlandの切り替え scale = 4 #適時変更 # skip first 300 frames frame_skip = 300 raw_count = 0 #rawfile_no picture_count = 0 #picturefile_no while True: for frame in container.decode(video=0): if 0 < frame_skip: frame_skip = frame_skip - 1 continue start_time = time.time() image = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR) cv2.imshow('Original', image) cv2.waitKey(1) if frame.time_base < 1.0 / 60: time_base = 1.0 / 60 else: time_base = frame.time_base frame_skip = int((time.time() - start_time) / time_base) dir_write('raw_data', 'frame_{:04d}.png'.format(raw_count), image) raw_count += 1 for e in pygame.event.get(): # イベントチェック if e.type == QUIT: # 終了が押された? drone.quit() return if e.type == KEYDOWN and e.key == K_ESCAPE: # ESCが押された? drone.quit() return # Joystick関連のイベントチェック if e.type == pygame.locals.JOYAXISMOTION: x1, y1 = joy.get_axis(0), joy.get_axis( 1) #左スティックのx,yに値の格納 x2, y2 = joy.get_axis(4), joy.get_axis( 3) #右スティックのx,yに値の格納 #print('x and y : ' + str(x) +' , '+ str(y)) drone.left_x = -x1 drone.left_y = -y1 drone.right_x = x2 / scale drone.right_y = -y2 / scale elif e.type == pygame.locals.JOYBALLMOTION: print('ball motion') elif e.type == pygame.locals.JOYHATMOTION: print('hat motion') elif e.type == pygame.locals.JOYBUTTONDOWN: print(str(e.button) + '番目のボタンが押された') if int(e.button) == 7 and fly_sw == False: #start drone.takeoff() fly_sw = True elif int(e.button) == 7 and fly_sw == True: #start drone.land() drone.quit() cv2.destroyAllWindows() filepath = os.path.join('raw_data') files = os.listdir(filepath) raw_count = 0 for file in files: index = re.search('.png', file) if index: raw_count += 1 print(raw_count) #ビデオとして結合 fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') video = cv2.VideoWriter('replay.mp4', fourcc, 20.0, (640, 480)) for i in range(0, raw_count): filepath = os.path.join( 'raw_data', 'frame_{:04d}.png'.format(i)) img = cv2.imread(filepath) img = cv2.resize(img, (640, 480)) video.write(img) video.release() for i in range(0, picture_count): filepath = os.path.join( 'take_picture', 'picture_{:04d}.png'.format(i)) img = cv2.imread(filepath) print(cv2.Laplacian( img, cv2.CV_64F).var()) #ラプラシアン微分 pixel = pic.pixelArt(img) dir_write('process_picture', 'dot_{:04d}.png'.format(i), pixel) water = pic.waterColor(img) dir_write('process_picture', 'water_{:04d}.png'.format(i), water) oil = pic.oilPaint(img) dir_write('process_picture', 'oil_{:04d}.png'.format(i), oil) fly_sw = False if int(e.button) == 3: #Y dir_write( 'take_picture', 'picture_{:04d}.png'.format(picture_count), image) picture_count += 1 elif e.type == pygame.locals.JOYBUTTONUP: print(str(e.button) + '番目のボタンが離された') except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) print(ex) finally: drone.quit() cv2.destroyAllWindows()
def load_video(path_to_video): # load the raw ultrasound video video = av.open(path_to_video) return video
def output_stream_write(): #time.sleep(5) while True: container = av.open(tmp) print(container) output.mux(frame)
if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) cap = Audio_Capture('test.wav','default') import time time.sleep(5) cap.cleanup() #mic device exit() # container = av.open('hw:0',format="alsa") container = av.open('1:0',format="avfoundation") print 'container:', container print '\tformat:', container.format print '\tduration:', float(container.duration) / av.time_base print '\tmetadata:' for k, v in sorted(container.metadata.iteritems()): print '\t\t%s: %r' % (k, v) print print len(container.streams), 'stream(s):' audio_stream = None for i, stream in enumerate(container.streams): print '\t%r' % stream print '\t\ttime_base: %r' % stream.time_base print '\t\trate: %r' % stream.rate
def extract_frames(video_path, time_left): frames = [] video = av.open(video_path) for frame in video.decode(0): yield frame.to_image()
arg_parser.add_argument("output") arg_parser.add_argument("-F", "--iformat") arg_parser.add_argument("-O", "--ioption", action="append", default=[]) arg_parser.add_argument("-f", "--oformat") arg_parser.add_argument("-o", "--ooption", action="append", default=[]) arg_parser.add_argument("-a", "--noaudio", action="store_true") arg_parser.add_argument("-v", "--novideo", action="store_true") arg_parser.add_argument("-s", "--nosubs", action="store_true") arg_parser.add_argument("-d", "--nodata", action="store_true") arg_parser.add_argument("-c", "--count", type=int, default=0) args = arg_parser.parse_args() input_ = av.open( args.input, format=args.iformat, options=dict(x.split("=") for x in args.ioption), ) output = av.open( args.output, "w", format=args.oformat, options=dict(x.split("=") for x in args.ooption), ) in_to_out = {} for i, stream in enumerate(input_.streams): if ( (stream.type == "audio" and not args.noaudio) or (stream.type == "video" and not args.novideo)
def make_update(): surface_definitions_path = os.path.join(rec_dir, "surface_definitions") if not os.path.exists(surface_definitions_path): return surface_definitions_dict = fm.Persistent_Dict(surface_definitions_path) surface_definitions_backup_path = os.path.join( rec_dir, "surface_definitions_deprecated") os.rename(surface_definitions_path, surface_definitions_backup_path) intrinsics_path = os.path.join(rec_dir, "world.intrinsics") if not os.path.exists(intrinsics_path): logger.warning( "Loading surface definitions failed: The data format of the " "surface definitions in this recording " "is too old and is no longer supported!") return valid_ext = (".mp4", ".mkv", ".avi", ".h264", ".mjpeg") existing_videos = [ f for f in glob.glob(os.path.join(rec_dir, "world.*")) if os.path.splitext(f)[1] in valid_ext ] if not existing_videos: return world_video_path = existing_videos[0] world_video = av.open(world_video_path) f = world_video.streams.video[0].format resolution = f.width, f.height intrinsics = cm.load_intrinsics(rec_dir, "world", resolution) DEPRECATED_SQUARE_MARKER_KEY = "realtime_square_marker_surfaces" if DEPRECATED_SQUARE_MARKER_KEY not in surface_definitions_dict: return surfaces_definitions_old = surface_definitions_dict[ DEPRECATED_SQUARE_MARKER_KEY] surfaces_definitions_new = [] for surface_def_old in surfaces_definitions_old: surface_def_new = {} surface_def_new["deprecated"] = True surface_def_new["name"] = surface_def_old["name"] surface_def_new["real_world_size"] = surface_def_old[ "real_world_size"] surface_def_new["build_up_status"] = 1.0 reg_markers = [] registered_markers_dist = [] for id, verts in surface_def_old["markers"].items(): reg_marker_dist = {"id": id, "verts_uv": verts} registered_markers_dist.append(reg_marker_dist) verts_undist = undistort_vertices(verts, intrinsics) reg_marker = {"id": id, "verts_uv": verts_undist} reg_markers.append(reg_marker) surface_def_new[ "registered_markers_dist"] = registered_markers_dist surface_def_new["reg_markers"] = reg_markers surfaces_definitions_new.append(surface_def_new) surface_definitions_dict_new = fm.Persistent_Dict( surface_definitions_path) surface_definitions_dict_new["surfaces"] = surfaces_definitions_new surface_definitions_dict_new.save()
def _get_av_container(self): if isinstance(self._source_path[0], io.BytesIO): self._source_path[0].seek(0) # required for re-reading return av.open(self._source_path[0])
def open(self, filename): if os.path.isfile(filename): self._media_container = av.open(filename)
def _load_fresh_file(self): self._demuxed_container = av.open(self.filename).demux() self._current_packet = next(self._demuxed_container).decode() self._packet_cursor = 0 self._frame_cursor = 0
try: from billiard import forking_enable forking_enable(0) except ImportError: pass logging.basicConfig(level=logging.DEBUG) cap = Audio_Capture('test.wav', 1) import time time.sleep(2) cap.close() #mic device exit() container = av.open('hw:0', format="alsa") container = av.open(':0', format="avfoundation") print 'container:', container print '\tformat:', container.format print '\tduration:', float(container.duration) / av.time_base print '\tmetadata:' for k, v in sorted(container.metadata.iteritems()): print '\t\t%s: %r' % (k, v) print print len(container.streams), 'stream(s):' audio_stream = None for i, stream in enumerate(container.streams): print '\t%r' % stream print '\t\ttime_base: %r' % stream.time_base
def _open_container(self): cont = av.open(self.path) return cont
def __init__(self, file, format=None, options={}): self.__container = av.open(file=file, format=format, mode="w", options=options) self.__tracks = {}
def renderAv(ffmpeg, ffprobe, vidFile: str, args, chunks: list, speeds: list, fps, has_vfr, temp, log): import av totalFrames = chunks[len(chunks) - 1][1] videoProgress = ProgressBar(totalFrames, 'Creating new video', args.machine_readable_progress, args.no_progress) if(has_vfr): class Wrapper: """ Wrapper which only exposes the `read` method to avoid PyAV trying to use `seek`. From: github.com/PyAV-Org/PyAV/issues/578#issuecomment-621362337 """ name = "<wrapped>" def __init__(self, fh): self._fh = fh def read(self, buf_size): return self._fh.read(buf_size) # Create a cfr stream on stdout. cmd = ['-i', vidFile, '-map', '0:v:0', '-vf', f'fps=fps={fps}', '-r', str(fps), '-vsync', '1', '-f', 'matroska', '-vcodec', 'rawvideo', 'pipe:1'] wrapper = Wrapper(ffmpeg.Popen(cmd).stdout) input_ = av.open(wrapper, 'r') else: input_ = av.open(vidFile) inputVideoStream = input_.streams.video[0] inputVideoStream.thread_type = 'AUTO' width = inputVideoStream.width height = inputVideoStream.height pix_fmt = inputVideoStream.pix_fmt log.debug(f' - pix_fmt: {pix_fmt}') cmd = [ffmpeg.getPath(), '-hide_banner', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-pix_fmt', pix_fmt, '-s', f'{width}*{height}', '-framerate', f'{fps}', '-i', '-', '-pix_fmt', pix_fmt] if(args.scale != 1): cmd.extend(['-vf', f'scale=iw*{args.scale}:ih*{args.scale}']) cmd = properties(cmd, args, vidFile, ffprobe) cmd.append(f'{temp}{sep()}spedup.mp4') if(args.show_ffmpeg_debug): process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE) else: process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) inputEquavalent = 0.0 outputEquavalent = 0 index = 0 chunk = chunks.pop(0) try: for packet in input_.demux(inputVideoStream): for frame in packet.decode(): index += 1 if(len(chunks) > 0 and index >= chunk[1]): chunk = chunks.pop(0) if(speeds[chunk[2]] != 99999): inputEquavalent += (1 / speeds[chunk[2]]) while inputEquavalent > outputEquavalent: in_bytes = frame.to_ndarray().tobytes() process2.stdin.write(in_bytes) outputEquavalent += 1 videoProgress.tick(index - 1) process2.stdin.close() process2.wait() except BrokenPipeError: log.print(cmd) process2 = subprocess.Popen(cmd, stdin=subprocess.PIPE) log.error('Broken Pipe Error!') if(log.is_debug): log.debug('Writing the output file.') else: log.conwrite('Writing the output file.')
def update_recording_v094_to_v0913(rec_dir, retry_on_averror=True): try: logger.info("Updating recording from v0.9.4 to v0.9.13") wav_file_loc = os.path.join(rec_dir, "audio.wav") aac_file_loc = os.path.join(rec_dir, "audio.mp4") audio_ts_loc = os.path.join(rec_dir, "audio_timestamps.npy") backup_ts_loc = os.path.join(rec_dir, "audio_timestamps_old.npy") if os.path.exists(wav_file_loc) and os.path.exists(audio_ts_loc): in_container = av.open(wav_file_loc) in_stream = in_container.streams.audio[0] in_frame_size = 0 in_frame_num = 0 out_container = av.open(aac_file_loc, "w") out_stream = out_container.add_stream("aac") for in_packet in in_container.demux(): for audio_frame in in_packet.decode(): if not in_frame_size: in_frame_size = audio_frame.samples in_frame_num += 1 out_packet = out_stream.encode(audio_frame) if out_packet is not None: out_container.mux(out_packet) # flush encoder out_packet = out_stream.encode(None) while out_packet is not None: out_container.mux(out_packet) out_packet = out_stream.encode(None) out_frame_size = out_stream.frame_size out_frame_num = out_stream.frames out_frame_rate = out_stream.rate in_frame_rate = in_stream.rate out_container.close() old_ts = np.load(audio_ts_loc) np.save(backup_ts_loc, old_ts) if len(old_ts) != in_frame_num: in_frame_size /= len(old_ts) / in_frame_num logger.debug( "Provided audio frame size is inconsistent with amount of " f"timestamps. Correcting frame size to {in_frame_size}") old_ts_idx = ( np.arange(0, len(old_ts) * in_frame_size, in_frame_size) * out_frame_rate / in_frame_rate) new_ts_idx = np.arange(0, out_frame_num * out_frame_size, out_frame_size) interpolate = interp1d(old_ts_idx, old_ts, bounds_error=False, fill_value="extrapolate") new_ts = interpolate(new_ts_idx) # raise RuntimeError np.save(audio_ts_loc, new_ts) _update_info_version_to("v0.9.13", rec_dir) except av.AVError as averr: # Try to catch `libav.aac : Input contains (near) NaN/+-Inf` errors # Unfortunately, the above error is only logged not raised. Instead # `averr`, an `Invalid Argument` error with error number 22, is raised. if retry_on_averror and averr.errno == 22: # unfortunately logger.error("Encountered AVError. Retrying to update recording.") out_container.close() # Only retry once: update_recording_v094_to_v0913(rec_dir, retry_on_averror=False) else: raise # re-raise exception
import av container = av.open("target_1280.mp4") for frame in container.decode(video=0): frame.to_image().save('frame-%04d.jpg' % frame.index)
def __call__(self, results): """ Perform mp4 decode operations. return: List where each item is a numpy array after decoder. """ file_path = results['filename'] results['format'] = 'video' results['backend'] = self.backend if self.backend == 'cv2': cap = cv2.VideoCapture(file_path) videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) sampledFrames = [] for i in range(videolen): ret, frame = cap.read() # maybe first frame is empty if ret == False: continue img = frame[:, :, ::-1] sampledFrames.append(img) results['frames'] = sampledFrames results['frames_len'] = len(sampledFrames) elif self.backend == 'decord': container = de.VideoReader(file_path) frames_len = len(container) results['frames'] = container results['frames_len'] = frames_len elif self.backend == 'pyav': # for TimeSformer if self.mode in ["train", "valid"]: clip_idx = -1 elif self.mode in ["test"]: clip_idx = 0 else: raise NotImplementedError container = av.open(file_path) num_clips = 1 # always be 1 # decode process fps = float(container.streams.video[0].average_rate) frames_length = container.streams.video[0].frames duration = container.streams.video[0].duration if duration is None: # If failed to fetch the decoding information, decode the entire video. decode_all_video = True video_start_pts, video_end_pts = 0, math.inf else: decode_all_video = False start_idx, end_idx = get_start_end_idx( frames_length, self.sampling_rate * self.num_seg / self.target_fps * fps, clip_idx, num_clips) timebase = duration / frames_length video_start_pts = int(start_idx * timebase) video_end_pts = int(end_idx * timebase) frames = None # If video stream was found, fetch video frames from the video. if container.streams.video: margin = 1024 seek_offset = max(video_start_pts - margin, 0) container.seek(seek_offset, any_frame=False, backward=True, stream=container.streams.video[0]) tmp_frames = {} buffer_count = 0 max_pts = 0 for frame in container.decode(**{"video": 0}): max_pts = max(max_pts, frame.pts) if frame.pts < video_start_pts: continue if frame.pts <= video_end_pts: tmp_frames[frame.pts] = frame else: buffer_count += 1 tmp_frames[frame.pts] = frame if buffer_count >= 0: break video_frames = [tmp_frames[pts] for pts in sorted(tmp_frames)] container.close() frames = [ frame.to_rgb().to_ndarray() for frame in video_frames ] clip_sz = self.sampling_rate * self.num_seg / self.target_fps * fps start_idx, end_idx = get_start_end_idx( len(frames), # frame_len clip_sz, clip_idx if decode_all_video else 0, # If decode all video, -1 in train and valid, 0 in test; # else, always 0 in train, valid and test, as we has selected clip size frames when decode. 1) results['frames'] = frames results['frames_len'] = len(frames) results['start_idx'] = start_idx results['end_idx'] = end_idx else: raise NotImplementedError return results
def mac_pyav_hack(): if platform.system() == "Darwin": try: av.open(':0',format="avfoundation") except: pass
def container(self): try: return self._container except AttributeError: self._container = av.open(self.url) return self.container
def output_stream_write(): #time.sleep(5) while True: container = av.open(tmp) print(container) output.mux(frame) session = livestreamer.Livestreamer() session.set_option("http-headers", "Client-ID=ewvlchtxgqq88ru9gmfp1gmyt6h2b93") streams = session.streams("http://www.twitch.tv/kalashz0r") print(streams) stream = streams['1080p60'] input = av.open(stream.url, options={'buffer_size': '1000000'}) tmp = BytesIO() buffer = av.open(tmp, 'w', 'mp4') output = av.open('rtmp://live.restream.io/live/re_882197_78625223222fd0769d68', mode='w', format='flv') input_streams = list() output_streams = list() buffer_streams = list() arrows = cv2.imread("/root/rama.png", -1)
import os import sys from cm import to_iron_ycbcr with open(sys.argv[1], 'rb') as f: frames = int.from_bytes(f.read(4), 'little') height = int.from_bytes(f.read(4), 'little') width = int.from_bytes(f.read(4), 'little') if frames == 0: frames = (os.path.getsize(sys.argv[1]) - 12) // height // width // 2 print('Frame counter not set! Guessed from file size as {}'.format( frames)) outvid = av.open(sys.argv[1] + '.mp4', 'w') if len(sys.argv) > 2: stream = outvid.add_stream('mpeg4', sys.argv[2]) else: stream = outvid.add_stream('mpeg4', '50') stream.bit_rate = 10000000 stream.pix_fmt = 'yuv420p' stream.width = width stream.height = height stream.thread_count = 3 outframe = av.VideoFrame(width, height, 'yuv420p') for frameno in range(frames): I = np.ndarray(buffer=f.read(width * height * 2), dtype=np.uint16,
def main(): #drone = tellopy.Tello() try: #drone.connect() #drone.wait_for_connection(60.0) # drone.startControlCommand() # drone.takeoff() # drone.takeoff() # sleep(3) # drone.land() # sleep(3) #drone.set_video_encoder_rate(1) # Open webcam on OS X. #container = av.open(format='avfoundation', file='0') #container = av.open(drone.get_video_stream()) container = av.open('sangyy4.mp4') #container.VideoFrame(320, 180, 'rgb24') #container.width = 320 #container.height = 180 ''' -camera (The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative number (by default), to auto-detect and open the first available camera.) type: int32 default: -1 -camera_fps (Frame rate for the webcam (also used when saving video). Set this value to the minimum value between the OpenPose displayed speed and the webcam real frame rate.) type: double default: 30 -camera_parameter_folder (String with the folder where the camera parameters are located.) type: string default: "models/cameraParameters/flir/" -camera_resolution (Set the camera resolution (either `--camera` or `--flir_camera`). `-1x-1` will use the default 1280x720 for `--camera`, or the maximum flir camera resolution available for `--flir_camera`) type: string default: "-1x-1" ''' print('Start Video Stream**********************************') # skip first 10 frames frame_skip = 20 #count_frame = 10 flags = numpy.zeros((1,4)) pastidx = None # a var to store info of indx, used in one person ver. to make same movement actor = None # a var to identify which user gets the control of tello while True: for frame in container.decode(video=0): if 0 < frame_skip: frame_skip = frame_skip-1 continue # start_time = time.time() interupt = cv2.waitKey(10) # 10s to read keys? roll call? #frame(320, 180, 'rgb24') #frame = frame(320, 180) #frame = frame.reformat(320, -1, 'rgb24') image = cv2.cvtColor(numpy.array(frame.reformat(272, 480).to_image()), cv2.COLOR_RGB2BGR) #image = cv2.resize(image, (640, 360)); keypoints, output_image = openpose.forward(image, True) # keypoints is a matrix filled in multi-person data # format:[[p1][p2][p3]] cv2.imshow("output", output_image) # print('get keypoint!') # print(keypoints) # once test input is not the 7 poses, return idx=6 ? # is it because the dist_all<0.7 is too general? # solution: modify idx when dist_all>0.7 (multi-person version) # for one person data matrix, size=3*25=75 if 40 < numpy.size(keypoints) < 76: # ensure that it is a valid data for one person # implement knn (idx, dist_all) = kNNtest.implement_kNN(keypoints) print('One-Person mode') actor = 0 # set actor as 0 in one person ver., if next frame is multi-person, we don't know who gets the control # this setting is due to actor cannot be none for logic comparasion in the multi-person actor change stage # print(dist_all) if dist_all[0] < 0.7: print('***** Pose_Idx=%d *****' % idx) # if the idx is not the same, change idx # if the idx is the same, do the same movement as the past idx indicates # if the idx of the pose cannot be recgonized, the drone will still move as the pastidx (save energy for actor) if idx != pastidx: pastidx = idx print('pose idx has changed to %d'%(idx)) idx2pose(pastidx) # for multi-person data matrices, size=n*(3*25) if numpy.size(keypoints) > 76: print('multi-person mode') person = len(keypoints) # a var used in person number changed between frames idx_list = [] # a list to store idx of all the person in one frame kp = dict() # apply knn to all the person matrices one by one for i in range(0, len(keypoints)): a = [] a.append(keypoints[i]) print('seperate kp') name = 'kp{}'.format(i) kp[name] = array(a) # ensure the points are enough for analysis if 40 < numpy.size(kp[name]) < 76: (idx, dist_all) = kNNtest.implement_kNN(kp[name]) print('idx, dist done') # if the pose of the person cannot be matched with poseidx 0-6, then idx = none if dist_all[0] > 0.7: idx = None # store the idx only for matrices with enough points idx_list += [idx] print('index list of multi-person:') print(idx_list) # this part is the assignment of actor in multi-person mode # in one person mode, actor = 0 as default # this part is the situation when the plane has not been taken off yet, so actor = none if actor == None: print('Actor is None in multi-person mode') # the person who let the plane take off is assigned as the actor if 2 in idx_list: actor = idx_list.index(2) idx = idx_list[actor] print('take off in multi-person mode by actor:', actor) idx2pose(idx) print('take off in multi-person mode done') # this part is entered when the plane: # 1/ takes off in multi-person mode # 2/ takes off in one-person mode elif actor != None: print('Actor is not None') # what if in the first frame, person=4, and actor=4 (list index=3) # in the next frame, person = 3, actor idx does not changed, list = [0,1,2] act = 3, out of range # base stage: # if person = 3, p-1 = 2, actor = 2 is out of range if person >= 3: if actor >= (person-1): actor = 0 print('actor overflow, changed to 0') # actor = 0 is still dangerous, need to be tested if 4 in idx_list: actor = idx_list.index(4) # actor is set to be the first idx of 4 in the list(due to function ofo index) # need to improve: ensure which is should be the actor # why the list is full of 4? is it a bug? print('actor has changed to the person who did pose 4') print('ready to get the idx in [multi-person] actor mode') idx = idx_list[actor] print('actor has set the idx to:',idx) idx2pose(idx) print('actor is :',actor,'pose is:',idx) # print('ready to do pose!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') # idx2pose(idx) # print('do action in multi-person!!!!!!!!!!!!!!!!!!!!!!!') elif interupt & 0xFF == ord('q'): cv2.destroyWindow(output_image) print('drone.land()') elif numpy.size(keypoints)== 0: ##if UAV can't find any person,turn around until detect one person print('drone.clockwise(20)') # drone.quit() # sleep(1) # if interupt & 0xFF == ord('l'): # drone.land() # sleep(2) # if interupt & 0xFF == ord('w'): # drone.forward(20) # # sleep(1) # if interupt & 0xFF == ord('s'): # drone.backward(20) # sleep(1) # if interupt & 0xFF == ord('a'): # drone.left(20) # sleep(1) # if interupt & 0xFF == ord('d'): # drone.right(20) # sleep(1) # if interupt & 0xFF == ord('z'): # drone.clockwise(20) # sleep(1) # if interupt & 0xFF == ord('c'): # drone.flip_right() # sleep(1) # if interupt & 0xFF == ord('t'): # drone.takeoff() # sleep(2) # if interupt & 0xFF == ord('u'): # drone.up(20) # sleep(1) # if interupt & 0xFF == ord('n'): # drone.down(20) # sleep(1) # if interupt & 0xFF == ord('v'): # drone.contourclockwise(20) # sleep(1) # if interupt & 0xFF == ord('b'): # drone.flip_left() # sleep(1) #count_frame = 10 flags = numpy.zeros((1, 4)) # initial count of each gesture are all 0 # print('***** count_frame=%d *****' % count_frame) # frame_skip = int((time.time() - start_time) / frame.time_base) frame_skip = 20 except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) print(ex) finally: print('drone.quit()') cv2.destroyAllWindows()
parser.add_argument('--model', type=str, help='path to model') parser.add_argument('--seed', type=int, help='random seed') parser.add_argument('--debug_video', type=str, help='path to save debug video') parser.add_argument('--robot_eye_video', type=str, help='path to save robot-eye video') parser.add_argument('--verbose', action='store_true', help='show reward, actions, dones for each step') args = parser.parse_args() if args.robot_eye_video: import av output = av.open(args.robot_eye_video, mode='w') stream = output.add_stream('mpeg4', rate=13) stream.pix_fmt = 'yuv420p' stream.height, stream.width = 128, 128 set_global_seeds(args.seed) env = HamstirRoomEmptyEnv(render=True, dim=128) if args.debug_video: env.logVideo(args.debug_video) env.seed(args.seed) env = DummyVecEnv([lambda: env]) model = PPO2.load(args.model, policy=NatureLitePolicy) sess = model.sess graph = sess.graph
def stream_worker(source, options, segment_buffer, quit_event): # noqa: C901 """Handle consuming streams.""" try: container = av.open(source, options=options, timeout=STREAM_TIMEOUT) except av.AVError: _LOGGER.error("Error opening stream %s", redact_credentials(str(source))) return try: video_stream = container.streams.video[0] except (KeyError, IndexError): _LOGGER.error("Stream has no video") container.close() return try: audio_stream = container.streams.audio[0] except (KeyError, IndexError): audio_stream = None # These formats need aac_adtstoasc bitstream filter, but auto_bsf not # compatible with empty_moov and manual bitstream filters not in PyAV if container.format.name in {"hls", "mpegts"}: audio_stream = None # Some audio streams do not have a profile and throw errors when remuxing if audio_stream and audio_stream.profile is None: audio_stream = None # Iterator for demuxing container_packets = None # The decoder timestamps of the latest packet in each stream we processed last_dts = {video_stream: float("-inf"), audio_stream: float("-inf")} # Keep track of consecutive packets without a dts to detect end of stream. missing_dts = 0 # The video pts at the beginning of the segment segment_start_pts = None # Because of problems 1 and 2 below, we need to store the first few packets and replay them initial_packets = deque() # Have to work around two problems with RTSP feeds in ffmpeg # 1 - first frame has bad pts/dts https://trac.ffmpeg.org/ticket/5018 # 2 - seeking can be problematic https://trac.ffmpeg.org/ticket/7815 def peek_first_pts(): """Initialize by peeking into the first few packets of the stream. Deal with problem #1 above (bad first packet pts/dts) by recalculating using pts/dts from second packet. Also load the first video keyframe pts into segment_start_pts and check if the audio stream really exists. """ nonlocal segment_start_pts, audio_stream, container_packets missing_dts = 0 found_audio = False try: container_packets = container.demux((video_stream, audio_stream)) first_packet = None # Get to first video keyframe while first_packet is None: packet = next(container_packets) if ( packet.dts is None ): # Allow MAX_MISSING_DTS packets with no dts, raise error on the next one if missing_dts >= MAX_MISSING_DTS: raise StopIteration( f"Invalid data - got {MAX_MISSING_DTS+1} packets with missing DTS while initializing" ) missing_dts += 1 continue if packet.stream == audio_stream: found_audio = True elif packet.is_keyframe: # video_keyframe first_packet = packet initial_packets.append(packet) # Get first_pts from subsequent frame to first keyframe while segment_start_pts is None or ( audio_stream and not found_audio and len(initial_packets) < PACKETS_TO_WAIT_FOR_AUDIO): packet = next(container_packets) if ( packet.dts is None ): # Allow MAX_MISSING_DTS packet with no dts, raise error on the next one if missing_dts >= MAX_MISSING_DTS: raise StopIteration( f"Invalid data - got {MAX_MISSING_DTS+1} packets with missing DTS while initializing" ) missing_dts += 1 continue if packet.stream == audio_stream: # detect ADTS AAC and disable audio if audio_stream.codec.name == "aac" and packet.size > 2: with memoryview(packet) as packet_view: if packet_view[0] == 0xFF and packet_view[ 1] & 0xF0 == 0xF0: _LOGGER.warning( "ADTS AAC detected - disabling audio stream" ) container_packets = container.demux( video_stream) audio_stream = None continue found_audio = True elif ( segment_start_pts is None ): # This is the second video frame to calculate first_pts from segment_start_pts = packet.dts - packet.duration first_packet.pts = segment_start_pts first_packet.dts = segment_start_pts initial_packets.append(packet) if audio_stream and not found_audio: _LOGGER.warning( "Audio stream not found" ) # Some streams declare an audio stream and never send any packets audio_stream = None except (av.AVError, StopIteration) as ex: _LOGGER.error( "Error demuxing stream while finding first packet: %s", str(ex)) return False return True if not peek_first_pts(): container.close() return segment_buffer.set_streams(video_stream, audio_stream) segment_buffer.reset(segment_start_pts) while not quit_event.is_set(): try: if len(initial_packets) > 0: packet = initial_packets.popleft() else: packet = next(container_packets) if packet.dts is None: # Allow MAX_MISSING_DTS consecutive packets without dts. Terminate the stream on the next one. if missing_dts >= MAX_MISSING_DTS: raise StopIteration( f"No dts in {MAX_MISSING_DTS+1} consecutive packets") missing_dts += 1 continue missing_dts = 0 except (av.AVError, StopIteration) as ex: _LOGGER.error("Error demuxing stream: %s", str(ex)) break # Discard packet if dts is not monotonic if packet.dts <= last_dts[packet.stream]: if (packet.time_base * (last_dts[packet.stream] - packet.dts) > MAX_TIMESTAMP_GAP): _LOGGER.warning( "Timestamp overflow detected: last dts %s, dts = %s, resetting stream", last_dts[packet.stream], packet.dts, ) break continue # Update last_dts processed last_dts[packet.stream] = packet.dts # Mux packets, and possibly write a segment to the output stream. # This mutates packet timestamps and stream segment_buffer.mux_packet(packet) # Close stream segment_buffer.close() container.close()
async def test_durations(hass, record_worker_sync): """Test that the duration metadata matches the media.""" # Use a target part duration which has a slight mismatch # with the incoming frame rate to better expose problems. target_part_duration = TEST_PART_DURATION - 0.01 await async_setup_component( hass, "stream", { "stream": { CONF_LL_HLS: True, CONF_SEGMENT_DURATION: SEGMENT_DURATION, CONF_PART_DURATION: target_part_duration, } }, ) source = generate_h264_video(duration=SEGMENT_DURATION + 1) stream = create_stream(hass, source, {}, stream_label="camera") # use record_worker_sync to grab output segments with patch.object(hass.config, "is_allowed_path", return_value=True): await stream.async_record("/example/path") complete_segments = list(await record_worker_sync.get_segments())[:-1] assert len(complete_segments) >= 1 # check that the Part duration metadata matches the durations in the media running_metadata_duration = 0 for segment in complete_segments: av_segment = av.open(io.BytesIO(segment.init + segment.get_data())) av_segment.close() for part_num, part in enumerate(segment.parts): av_part = av.open(io.BytesIO(segment.init + part.data)) running_metadata_duration += part.duration # av_part.duration actually returns the dts of the first packet of the next # av_part. When we normalize this by av.time_base we get the running # duration of the media. # The metadata duration may differ slightly from the media duration. # The worker has some flexibility of where to set each metadata boundary, # and when the media's duration is slightly too long or too short, the # metadata duration may be adjusted up or down. # We check here that the divergence between the metadata duration and the # media duration is not too large (2 frames seems reasonable here). assert math.isclose( (av_part.duration - av_part.start_time) / av.time_base, part.duration, abs_tol=2 / av_part.streams.video[0].rate + 1e-6, ) # Also check that the sum of the durations so far matches the last dts # in the media. assert math.isclose( running_metadata_duration, av_part.duration / av.time_base, abs_tol=1e-6, ) # And check that the metadata duration is between 0.85x and 1.0x of # the part target duration if not (part.has_keyframe or part_num == len(segment.parts) - 1): assert part.duration > 0.85 * target_part_duration - 1e-6 assert part.duration < target_part_duration + 1e-6 av_part.close() # check that the Part durations are consistent with the Segment durations for segment in complete_segments: assert math.isclose( sum(part.duration for part in segment.parts), segment.duration, abs_tol=1e-6, ) await record_worker_sync.join() stream.stop()
import av import cv2 arg_parser = argparse.ArgumentParser() arg_parser.add_argument('-r', '--rate', default='23.976') arg_parser.add_argument('-f', '--format', default='yuv420p') arg_parser.add_argument('-w', '--width', type=int) arg_parser.add_argument('--height', type=int) arg_parser.add_argument('-b', '--bitrate', type=int, default=8000000) arg_parser.add_argument('-c', '--codec', default='mpeg4') arg_parser.add_argument('inputs', nargs='+') arg_parser.add_argument('output', nargs=1) args = arg_parser.parse_args() output = av.open(args.output[0], 'w') stream = output.add_stream(args.codec, args.rate) stream.bit_rate = args.bitrate stream.pix_fmt = args.format for i, path in enumerate(args.inputs): print(os.path.basename(path)) img = cv2.imread(path) if not i: stream.height = args.height or (args.width * img.shape[0] / img.shape[1]) or img.shape[0] stream.width = args.width or img.shape[1]
def _decode_frames_by_av_module( full_path, video_start_pts=0, video_end_pts=None, audio_start_pts=0, audio_end_pts=None, ): """ Use PyAv to decode video frames. This provides a reference for our decoder to compare the decoding results. Input arguments: full_path: video file path video_start_pts/video_end_pts: the starting/ending Presentation TimeStamp where frames are read """ if video_end_pts is None: video_end_pts = float('inf') if audio_end_pts is None: audio_end_pts = float('inf') container = av.open(full_path) video_frames = [] vtimebase = torch.zeros([0], dtype=torch.int32) if container.streams.video: video_frames = _read_from_stream( container, video_start_pts, video_end_pts, container.streams.video[0], {"video": 0}, ) # container.streams.video[0].average_rate is not a reliable estimator of # frame rate. It can be wrong for certain codec, such as VP80 # So we do not return video fps here vtimebase = _fraction_to_tensor(container.streams.video[0].time_base) audio_frames = [] atimebase = torch.zeros([0], dtype=torch.int32) if container.streams.audio: audio_frames = _read_from_stream( container, audio_start_pts, audio_end_pts, container.streams.audio[0], {"audio": 0}, ) atimebase = _fraction_to_tensor(container.streams.audio[0].time_base) container.close() vframes = [frame.to_rgb().to_ndarray() for frame in video_frames] vframes = torch.as_tensor(np.stack(vframes)) vframe_pts = torch.tensor([frame.pts for frame in video_frames], dtype=torch.int64) aframes = [frame.to_ndarray() for frame in audio_frames] if aframes: aframes = np.transpose(np.concatenate(aframes, axis=1)) aframes = torch.as_tensor(aframes) else: aframes = torch.empty((1, 0), dtype=torch.float32) aframe_pts = torch.tensor( [audio_frame.pts for audio_frame in audio_frames], dtype=torch.int64) return DecoderResult( vframes=vframes, vframe_pts=vframe_pts, vtimebase=vtimebase, aframes=aframes, aframe_pts=aframe_pts, atimebase=atimebase, )
save_path = os.path.join(path, 'sorted') video = video_file results_name = os.path.split(video_file)[-1].split('.')[0] + '_pred.pickle' log.info("saving analysis to: {0}".format(os.path.join(save_path, results_name))) # define empty lists to hold params (cnn) a_cnn = [] b_cnn = [] x_cnn = [] y_cnn = [] phi_cnn = [] lx=[];ly=[];tx=[];ty=[];rx=[];ry=[];bx=[];by=[] # reopen the video with pyav container = av.open(video) video_stream = [s for s in container.streams][0] for i, packet in enumerate(container.demux(video_stream)): if i % 1000 == 0: log.info("frame: {0}...".format(i)) nd.update_job_tick() try: frame = packet.decode()[0] frame_ = np.asarray(frame.to_image().convert('LA')) frame_ = frame_[:, :-10, :] frame_ = frame_ - np.min(frame_) if frame_.shape[-1] > 1: frame_ = frame_[:, :, 0]
def open_video(filename): try: return av.open(filename) except UnicodeDecodeError: print("Opening with metadata encoding latin-1", file=sys.stderr) return av.open(filename, metadata_encoding="latin-1")
def save_2d_keypoints_and_images(video_name, video_path, npy_path, rgb_skeleton_data, frame_time_dict): mismatch_count = 0 ## cap = cv2.VideoCapture(video_path) ## assert(cap.isOpened() == True) container = av.open(video_path) for k, fr in enumerate(container.decode(video=0)): assert (k == fr.index) ## for k in frame_time_dict.keys(): nearest_idx, nearest_time = find_nearest_frameindex_from_skeleton_file( rgb_skeleton_data[..., 0], frame_time_dict[k]) # take column 0 (time) from rgb data # print("k (video frame) ", k, "\t time", frame_time_dict[k], "\t nearest_idx from skeleton file", nearest_idx, "\t nearest_time", nearest_time) # print("k=>", k, nearest_idx, "<= nearest_idx") if (abs(frame_time_dict[k] - nearest_time) > 1000000): # 100 ns ticks, so 1000000 = 0.1sec mismatch_count += 1 continue # do not add the nearest found index if the difference is really big (>0.1sec) else: # print(rgb_skeleton_data[nearest_idx]) if (np.inf not in rgb_skeleton_data[nearest_idx] ): # do not add if there is np.inf in the line ## cap.set(cv2.CAP_PROP_POS_FRAMES, k) ## success, frame = cap.read() # frame is read as (h, w, c) success = True # hard-coded for PyAV frame = fr.to_image() # converting PIL (<class 'PIL.Image.Image'>) to <class 'numpy.ndarray'> img = np.asarray(frame) # h, w, c if success: os.makedirs(os.path.join(npy_path, video_name), exist_ok=True) save_dir = os.path.join(npy_path, video_name) # 1 # save image with the original resolution # print("kth frame =", k, frame.shape, "\n") # cv2.imwrite(os.path.join(save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + '.jpg'), frame) # 2 # save downsampled image ## bgr to rgb ## img = frame[...,::-1] img_central = img[:, 240:(1920 - 240), :] # downsample by 4.5 img_down = pyramid_reduce( img_central, downscale=4.5) # better than resize # print("img_down shape (h, w, c)", img_down.shape) # height, width, channels (rgb) skimage.io.imsave( os.path.join( save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + "_240x320.jpg"), img_down) # 3 # save heatmaps and pafs sk_keypoints_with_tracking_info = rgb_skeleton_data[ nearest_idx][1:] # ignore index 0 (time) sk_keypoints = np.delete( sk_keypoints_with_tracking_info, np.arange(0, sk_keypoints_with_tracking_info.size, 3) ) # this is without tracking info, by removing the tracking info # print("sk_kp shape =", sk_keypoints.shape) # (38, ) # for 20 (actually 19 + background) heatmaps ===================================== for kpn in range(sk_keypoints.shape[0] // 2): kpx = sk_keypoints[2 * kpn] kpy = sk_keypoints[2 * kpn + 1] # print(kpx, kpy) index_array = np.zeros((240 // ground_truth_factor, 320 // ground_truth_factor, 2)) for i in range(index_array.shape[0]): for j in range(index_array.shape[1]): index_array[i][j] = [ i, j ] # height (y), width (x) => index_array[:,:,0] = y pixel coordinate and index_array[:,:,1] = x if kpn == 0: heatmap = get_heatmap( index_array, kpx_kpy_transformer([kpx, kpy]) ) # /4 because image is 1080 x 1920 and so are the original pixel locations of the keypoints else: heatmap = np.dstack( (heatmap, get_heatmap(index_array, kpx_kpy_transformer([kpx, kpy])))) # print("heatmap.shape =", heatmap.shape) # generate background heatmap maxed_heatmap = np.max( heatmap[:, :, :], axis=2 ) # print("maxed_heatmap.shape = ", maxed_heatmap.shape) heatmap = np.dstack((heatmap, 1 - maxed_heatmap)) # print("final heatmap.shape =", heatmap.shape) np.save( os.path.join( save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + "_heatmap30x40.npy"), heatmap) # for 18x2 PAFs ===================================== for n, pair in enumerate(paf_pairs_indices): # print("writing paf for index", n, pair) index_array = np.zeros((240 // ground_truth_factor, 320 // ground_truth_factor, 2)) for i in range(index_array.shape[0]): for j in range(index_array.shape[1]): index_array[i][j] = [ i, j ] # height (y), width (x) => index_array[:,:,0] = y pixel coordinate and index_array[:,:,1] = x if n == 0: paf = get_pafx_pafy( index_array, kp0xy=kpx_kpy_transformer([ sk_keypoints[2 * pair[0]], sk_keypoints[2 * pair[0] + 1] ]), kp1xy=kpx_kpy_transformer([ sk_keypoints[2 * pair[1]], sk_keypoints[2 * pair[1] + 1] ])) else: paf = np.dstack( (paf, get_pafx_pafy( index_array, kp0xy=kpx_kpy_transformer([ sk_keypoints[2 * pair[0]], sk_keypoints[2 * pair[0] + 1] ]), kp1xy=kpx_kpy_transformer([ sk_keypoints[2 * pair[1]], sk_keypoints[2 * pair[1] + 1] ])))) # print("paf.shape =", paf.shape) # print("final paf.shape =========================", paf.shape) np.save( os.path.join( save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + "_paf30x40.npy"), paf) # 4 # save the 2d keypoints of shape (38,) # print(rgb_skeleton_data[nearest_idx]) # print(save_dir, os.path.join("", video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + '.npy')) np.save( os.path.join( save_dir, video_name + "_vfr_" + str(k) + "_skfr_" + str(nearest_idx) + '.npy'), rgb_skeleton_data[nearest_idx][1:] ) # index 0 is time # saving all 57 values 19 * 3 (tracking, x, y) ## cap.release() print("mismatch_count =", mismatch_count)