def run(self): for input in self.input: container = av.open(input, 'r') resampler = av.AudioResampler( format=av.AudioFormat('s16'), layout=2, rate=44100, ) for packet in container.demux(): for frame in packet.decode(): type = packet.stream.type video_frame = None audio_frame = None if type == 'video': new_v_frame = frame.reformat(self.w, self.h, 'rgb24') new_v_frame.pts = None video_frame = new_v_frame.planes[0].to_bytes() if type == 'audio': frame.pts = None new_a_frame = resampler.resample(frame) audio_frame = new_a_frame.planes[0].to_bytes() self.fifo.put([video_frame, audio_frame])
def convert(inputstreamfile, outputstream, format,codec, channel_layout, rate): try: # set input/output locations inp = av.open(inputstreamfile) #out = av.open(f"{outputfile}", 'w') out = av.open(outputstream,'w') #out_stream = out.add_stream(f"{codec}",rate=16000) out_stream = out.add_stream(codec_name=codec, rate=rate) # resampler object details how we want to change frame information resampler = av.AudioResampler( format=av.AudioFormat(format).packed, layout=channel_layout, rate=rate ) # decode frames and start re-encoding into new file for frame in inp.decode(audio=0): frame.pts = None # pts is presentation time-stamp. Not relevant here. frame = resampler.resample(frame) # get current working frame and re-sample it for encoding for p in out_stream.encode(frame): # encode the re-sampled frame out.mux(p) out.close() except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno)
def __init__(self, video_path, sampling_rate=1, decode_lossy=False, audio_resample_rate=None): """ Arguments: video_path (str): path of the video to be loaded """ self.container = av.open(video_path) self.sampling_rate = sampling_rate self.resampler = None if audio_resample_rate is not None: self.resampler = av.AudioResampler(rate=audio_resample_rate) if self.container.streams.video: # enable multi-threaded video decoding if decode_lossy: warnings.warn( 'VideoReader| thread_type==AUTO can yield potential frame dropping!', RuntimeWarning) self.container.streams.video[0].thread_type = 'AUTO' self.video_stream = self.container.streams.video[0] else: self.video_stream = None
def player_worker(loop, container, audio_track, video_track, quit_event, throttle_playback): audio_fifo = av.AudioFifo() audio_format = av.AudioFormat('s16') audio_sample_rate = 48000 audio_samples = 0 audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME) audio_resampler = av.AudioResampler( format=audio_format, rate=audio_sample_rate) video_first_pts = None frame_time = None start_time = time.time() while not quit_event.is_set(): try: frame = next(container.decode()) except (av.AVError, StopIteration): if audio_track: asyncio.run_coroutine_threadsafe(audio_track._queue.put(None), loop) if video_track: asyncio.run_coroutine_threadsafe(video_track._queue.put(None), loop) break # read up to 1 second ahead if throttle_playback: elapsed_time = (time.time() - start_time) if frame_time and frame_time > elapsed_time + 1: time.sleep(0.1) if isinstance(frame, AudioFrame) and audio_track: if frame.format != audio_format or frame.sample_rate != audio_sample_rate: frame.pts = None frame = audio_resampler.resample(frame) # fix timestamps frame.pts = audio_samples frame.time_base = fractions.Fraction(1, audio_sample_rate) audio_samples += frame.samples audio_fifo.write(frame) while True: frame = audio_fifo.read(audio_samples_per_frame) if frame: frame_time = frame.time asyncio.run_coroutine_threadsafe(audio_track._queue.put(frame), loop) else: break elif isinstance(frame, VideoFrame) and video_track: # video from a webcam doesn't start at pts 0, cancel out offset if frame.pts is not None: if video_first_pts is None: video_first_pts = frame.pts frame.pts -= video_first_pts frame_time = frame.time asyncio.run_coroutine_threadsafe(video_track._queue.put(frame), loop)
def load_audio(file, sr, offset, duration, resample=True, approx=False, time_base='samples', check_duration=True): if time_base == 'sec': offset = offset * sr duration = duration * sr # Loads at target sr, stereo channels, seeks from offset, and stops after duration container = av.open(file) audio = container.streams.get(audio=0)[0] # Only first audio stream audio_duration = audio.duration * float(audio.time_base) if approx: if offset + duration > audio_duration * sr: # Move back one window. Cap at audio_duration offset = np.min(audio_duration * sr - duration, offset - duration) else: if check_duration: assert offset + duration <= audio_duration * sr, f'End {offset + duration} beyond duration {audio_duration*sr}' if resample: resampler = av.AudioResampler(format='fltp', layout='stereo', rate=sr) else: assert sr == audio.sample_rate offset = int( offset / sr / float(audio.time_base) ) #int(offset / float(audio.time_base)) # Use units of time_base for seeking duration = int( duration ) #duration = int(duration * sr) # Use units of time_out ie 1/sr for returning sig = np.zeros((2, duration), dtype=np.float32) container.seek(offset, stream=audio) total_read = 0 for frame in container.decode(audio=0): # Only first audio stream if resample: frame.pts = None frame = resampler.resample(frame) if frame is None: break frame = frame.to_ndarray( format='fltp') # Convert to floats and not int16 read = frame.shape[-1] if total_read + read > duration: read = duration - total_read sig[:, total_read:total_read + read] = frame[:, :read] total_read += read if total_read == duration: break assert total_read <= duration, f'Expected {duration} frames, got {total_read}' return sig, sr
def decode_audio(data): decoded_audio = b'' data = BytesIO(data) container = av.open(data) resampler = av.AudioResampler('s16', 'mono', 16000) audio_stream = next(s for s in container.streams if s.type == 'audio') for packet in container.demux(audio_stream): for frame in packet.decode(): frame = resampler.resample(frame) decoded_audio += frame.planes[0].to_bytes() return np.frombuffer(decoded_audio, dtype=np.int16)
def open(self, path, mono=False, sample_rate=None): """Open the audio resource.""" self.path = path self.open_kargs = {'mono': mono, 'sample_rate': sample_rate} self.container = container = av.open( path, options={'usetoc': '1', # Timeouts of I/O operations in µs and ms 'timeout': '5000000', 'listen_timeout': '5000'}) # 'usetoc' is set to enable fast seek (see also # ffmpeg commit c43bd08 for a 'fastseek' option) log.debug('container: %s', container) stream = self.stream = \ next(s for s in container.streams if s.type == 'audio') log.debug('stream: %s', stream) resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout='mono' if mono else stream.layout, rate=sample_rate or stream.rate or 44100) def decode_iter(): """Genrator reading and decoding the audio stream.""" for packet in container.demux(stream): for frame in packet.decode(): self.last_frame_pts = frame.pts # frame pts must be set to None # (see https://github.com/mikeboers/PyAV/issues/281) frame.pts = None frame = resampler.resample(frame) yield frame self.decode_iter = decode_iter() self.pos = 0 # Duration in seconds if stream.duration: self.duration = int(stream.duration * stream.time_base) else: # It is certainly a web file log.info("No duration") self.duration = None self.num_channels = 1 if mono else stream.channels self.sample_rate = resampler.rate
def __init__(self, path, output_chunk_size, output_rate, realtime=True, time_limit=None, output_format='s16', output_layout='mono'): """ :type path: str :type output_chunk_size: int :type output_rate: int :type realtime: bool :type time_limit: float """ if output_format != 's16': raise NotImplementedError( 'output_format {} is not supported.'.format(output_format)) if output_layout != 'mono': raise NotImplementedError( 'output_layout {} is not supported.'.format(output_layout)) self._realtime = realtime self._chunk_size = output_chunk_size self._time_limit = time_limit self._bit_rate = output_rate * 16 self._chunk_duration = output_chunk_size * 8 / self._bit_rate self._afi = AudioFrameIterable(path) self._resampler = av.AudioResampler( format=av.AudioFormat(output_format).packed, layout=output_layout, rate=output_rate, ) self._buffer = b'' self._timestamp = 0 self._duration_processed = 0
def demultiplexer(self, container): # resample audio line to the given format resampler = av.AudioResampler( format=av.AudioFormat('s16'), layout=2, rate=self.audio_rate, ) # loop over the container for packet in container.demux(): type = packet.stream.type # orig_fps = packet.stream.rate for frame in packet.decode(): # current time in video clip # timestamp = float(frame.pts * packet.stream.time_base) video_frame = None audio_frame = None if type == 'video': # print('video pts: {}'.format(frame.pts)) frame.pts = self.new_vid_pts new_v_frame = frame.reformat(self.w, self.h, 'yuv420p') video_frame = new_v_frame self.new_vid_pts += 512 if type == 'audio': # print('audio pts: {}'.format(frame.pts)) frame.pts = None new_a_frame = resampler.resample(frame) audio_frame = new_a_frame # push to fifo buffer self.fifo.put([video_frame, audio_frame])
def reloadResampler(self): self.Resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout='stereo' if CHANNELS >= 2 else 'mono', rate=SAMPLING_RATE)
def init_audio_sink(self): print("audit") self.pa = pyaudio.PyAudio() self.sink = self.pa.open(format=self.pa.get_format_from_width(2), channels=2, rate=44100, output=True) codec = None extradata = None if self.audio_format == Audio.AudioFormat.ALAC_44100_16_2.value: extradata = bytes([ # Offset 0x00000000 to 0x00000035 0x00, 0x00, 0x00, 0x24, 0x61, 0x6c, 0x61, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x60, 0x00, 0x10, 0x28, 0x0a, 0x0e, 0x02, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x44 ]) codec = av.codec.Codec('alac', 'r') elif self.audio_format == Audio.AudioFormat.AAC_LC_44100_2.value: codec = av.codec.Codec('aac', 'r') if codec is not None: self.codecContext = av.codec.CodecContext.create(codec) self.codecContext.sample_rate = 44100 self.codecContext.channels = 2 self.codecContext.format = AudioFormat('s16p') if extradata is not None: self.codecContext.extradata = extradata self.resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout='stereo', rate=44100, )
def _enqueue(self, run, finished, filepath, vid_q, aud_q, vid_info, *stindex): aud_resampler = av.AudioResampler( format=av.AudioFormat( 's16p').packed, # WAV PCM signed 16bit planar layout='stereo', ) def decode(): print 'started decoding and queueing' container = av.open(filepath) streams = [container.streams[indx] for indx in stindex] prev_video_frame = None prev_video_ts = None v_stream = container.streams.video[0] # Scale down to keep things fast. out_longest_side = max(self._vwidth, self._vheight) if v_stream.height > v_stream.width: scale_args = "w=min(%d,iw):h=-1:flags=area" % ( out_longest_side, ) else: scale_args = "w=-1:h=min(%d,ih):flags=area" % ( out_longest_side, ) filtergraph = av.filter.Graph() v_src = filtergraph.add_buffer(template=v_stream) v_bgr = filtergraph.add("format", "pix_fmts=bgr24") v_scale = filtergraph.add("scale", scale_args) v_snk = filtergraph.add("buffersink") v_src.link_to(v_bgr) v_bgr.link_to(v_scale) v_scale.link_to(v_snk) for packet in container.demux(streams): run.wait() for frame in packet.decode(): play_at = float(frame.time_base * frame.pts) if frame.pts else None if isinstance(frame, av.AudioFrame): frame_r = aud_resampler.resample(frame) raw_audio = frame_r.planes[0].to_bytes() aud_q.put(raw_audio) elif isinstance(frame, av.VideoFrame): # NOTE: use filtergraph to convert to bgr24 instead of # frame.reformat(format='bgr24'). # # For a yuv420p frame, with SIMD optimizations on, # frame.reformat(format='bgr24') will fail to convert # the last width%8 pixels on each row, leaving a # stripe of uninitialized data down the right side. # # The problem is VideoFrame allocates buffers with # align=1 instead of align=SIMD_width_of_cpu. # # libavfilter allocates buffers with align=32 so a # doing the bgr24 conversion via a filtergraph works. v_src.push(frame) frame_bgr = v_snk.pull() # frame.to_nd_array() expects buffers to be align=1 so # we have to do this by hand plane = frame_bgr.planes[0] dtype = numpy.uint8 bytes_per_pixel = 3 frame_h, frame_w = frame_bgr.height, frame_bgr.width buffer_w = plane.line_size / bytes_per_pixel frame_bgr = numpy.frombuffer(plane, dtype).reshape( frame_h, buffer_w, -1)[:frame_h, :frame_w] vid_q.put((prev_video_frame, prev_video_ts, play_at or 0)) if vid_info['rotate'] == 90: prev_video_frame = numpy.rot90(frame_bgr.copy(), k=-1) elif vid_info['rotate'] == 180: prev_video_frame = numpy.fliplr( numpy.flipud(frame_bgr.copy())) elif vid_info['rotate'] == 270: prev_video_frame = numpy.rot90(frame_bgr.copy()) else: prev_video_frame = frame_bgr.copy() prev_video_ts = play_at or 0 else: print 'unknown frame', frame print 'finished decoding and queueing' decode() finished.set()
def worker(player, loop, container, streams, tracks, lock_tracks, quit_event, throttle_playback, audio_effect, video_effect): import fractions import time audio_fifo = av.AudioFifo() audio_format_name = "s16" audio_layout_name = "stereo" audio_sample_rate = 48000 audio_samples = 0 audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME) audio_resampler = av.AudioResampler(format=audio_format_name, layout=audio_layout_name, rate=audio_sample_rate) video_first_pts = None audio_frame_time = None start_time = time.time() audio_print_warning = True video_print_warning = True def iter_tracks(kind=None): with lock_tracks: for track in tracks: track = track() if track is not None: if kind is None or kind == track.kind: yield track def cleanup_tracks(): with lock_tracks: to_remove = {track for track in tracks if track() is None} for track in to_remove: tracks.discard(track) def run_threadsafe(coro): asyncio.run_coroutine_threadsafe(coro, loop) def append_frame(frame, kind=None, force=True): for track in iter_tracks(kind=kind): if track._queue.full(): # remove one frame and append the new frame if force: run_threadsafe(track._queue.get()) run_threadsafe(track._queue.put(frame)) else: run_threadsafe(track._queue.put(frame)) while not quit_event.is_set(): # clean invalid ref cleanup_tracks() # decode frame try: frame = next(container.decode(*streams)) except (av.AVError, StopIteration): for track in iter_tracks(): append_frame(None, force=True) break # read up to 1 second ahead if throttle_playback: elapsed_time = time.time() - start_time if audio_frame_time and audio_frame_time > elapsed_time + 1: time.sleep(0.1) # audio if isinstance(frame, av.AudioFrame) and (set(iter_tracks('audio')) or player.always_running): if (frame.format.name != audio_format_name or frame.layout.name != audio_layout_name or frame.sample_rate != audio_sample_rate): frame.pts = None frame = audio_resampler.resample(frame) # fix timestamps frame.pts = audio_samples frame.time_base = fractions.Fraction(1, audio_sample_rate) audio_samples += frame.samples # apply audio effect if audio_effect is not None: try: frame = audio_effect(loop, frame) audio_print_warning = True except BaseException: if audio_print_warning: logger.exception('Failed to apply audio effect') audio_print_warning = False audio_fifo.write(frame) while True: frame = audio_fifo.read(audio_samples_per_frame) if frame: audio_frame_time = frame.time append_frame(frame, 'audio') else: break # video if isinstance(frame, av.VideoFrame) and (set(iter_tracks('video')) or player.always_running): if frame.pts is None: # pragma: no cover logger.warning("Skipping video frame with no pts") continue # video from a webcam doesn't start at pts 0, cancel out offset if video_first_pts is None: video_first_pts = frame.pts frame.pts -= video_first_pts # drop frame if too late if throttle_playback: elapsed_time = time.time() - start_time if elapsed_time - frame.time > 0.1: continue # apply video effect if video_effect is not None: try: frame = video_effect(loop, frame) video_print_warning = True except BaseException: if video_print_warning: logger.exception('Failed to apply video effect') video_print_warning = False append_frame(frame, 'video')
def live(): ''' youtube-dl pip install av ''' filepath = './youtube_live.mp3' save_strean = True infinite = True duration = 50 # seconds command = ['youtube-dl', '-f', '91', '-g', FLAGS.url] proc = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=10**8) out, err = proc.communicate() videolink = out.decode("utf-8").strip() resampler = av.AudioResampler("s16p", layout=1, rate=16 * 1000) if not infinite and save_strean: output_container = av.open(filepath, 'w') output_stream = output_container.add_stream('mp3') input_container = av.open(videolink) input_stream = input_container.streams.get(audio=0)[0] win_size = (FLAGS.win_length + FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame - 1)) hop_size = (FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame)) if FLAGS.stream_decoder == 'torch': stream_decoder = PytorchStreamDecoder(FLAGS) else: stream_decoder = OpenVINOStreamDecoder(FLAGS) # track_counter = 0 begin_time = datetime.now() buffer = torch.empty(1, 0) blank_counter = 0 for frame in input_container.decode(input_stream): frame.pts = None resample_frame = resampler.resample(frame) waveform = resample_frame.to_ndarray() waveform = torch.tensor(waveform.copy()) waveform = waveform.float() / 32768 if torch.isnan(waveform).any(): print("[NAN]", flush=True, end=" ") if buffer.shape[1] < win_size: buffer = torch.cat([buffer, waveform], dim=-1) while buffer.shape[1] >= win_size: waveform = buffer[:, :win_size] buffer = buffer[:, hop_size:] if torch.isnan(waveform).any(): print("[NAN] waveform", flush=True, end=" ") continue seq = stream_decoder.decode(waveform) if seq == "": blank_counter += 1 if blank_counter == 35: print(' [Background]') stream_decoder.reset() else: blank_counter = 0 print(seq, end='', flush=True) if not infinite and save_strean: for packet in output_stream.encode(resample_frame): output_container.mux(packet) if not infinite: if (datetime.now() - begin_time).total_seconds() > duration: break if not infinite and save_strean: for packet in output_stream.encode(None): output_container.mux(packet) output_container.close()
def convert_pyav(input, output, file_name, extension, a_codec, v_codec, sample_rate, sample_fmt, channels): try: print('trying PyAV Method...') print('variables: ', input, output, file_name, extension, a_codec, v_codec, sample_rate, sample_fmt, channels) # I/O VARIABLES inp = av.open(input, 'r') out = av.open(output, 'w') # out_video_stream = out.add_stream(v_codec) out_audio_stream = out.add_stream(a_codec, rate=int(sample_rate)) # RESAMPLER OBJECT (WARNING: ONLY SET RATE ON AUDIO-STREAM -- FORMATTING ISSUES) resampler = av.AudioResampler( format=sample_fmt, layout=channels, ) """ add_abuffer missing from filter.Graph(). Wait for stable release to implement. Use FFmpeg for now. """ # graph = av.filter.Graph() # # fchain = [] # iastrm = next(s for s in inp.streams if s.type == 'audio') # # frame_rate = str # sample_fmt = str # bit_depth = str # for s in inp.streams: # if s.type == 'audio': # sample_fmt = s.format.name # frame_rate = s.sample_rate # # # channels = int # for frame in inp.decode(audio=0): # channels = frame.layout.channels # # print(frame_rate, sample_fmt, channels) # # fchain.append(graph.add_abuffer(template=iastrm)) # fchain.append(graph.add('silenceremove', 'stop_periods=-1:stop_duration=1:stop_threshold=-90dB')) # fchain[-2].link_to(fchain[-1]) # # fchain.append(graph.add("buffersink")) # graph must end with buffersink...? # fchain[-2].link_to(fchain[-1]) # # for value, filter in enumerate(av.filter.filters_available): # print(value, filter) # DECODING/ENCODING for frame in inp.decode(audio=0): frame.pts = None # pts is presentation time-stamp. Not relevant here. frame = resampler.resample( frame) # get current working frame and re-sample it for encode for packet in out_audio_stream.encode( frame): # encode the re-sampled frame out.mux(packet) """ wait for add_abuffer in next update for this... """ # fchain[0].push(frame) # ofr = fchain[-1].pull() # ofr.pts = None # for p in out_audio_stream.encode(ofr): # 'p' stands for packet # out.mux(p) for packet in out_audio_stream.encode(None): # 'p' stands for packet out.mux(packet) out.close() except Exception as e: settings.exception_counter += 1 logger.error('admin_message', msg='Could not convert the file', exc_info=e)
from qtproxy import Q import av parser = argparse.ArgumentParser() parser.add_argument('path') args = parser.parse_args() container = av.open(args.path) stream = next(s for s in container.streams if s.type == 'audio') fifo = av.AudioFifo() resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout='stereo', rate=48000, ) qformat = Q.AudioFormat() qformat.setByteOrder(Q.AudioFormat.LittleEndian) qformat.setChannelCount(2) qformat.setCodec('audio/pcm') qformat.setSampleRate(48000) qformat.setSampleSize(16) qformat.setSampleType(Q.AudioFormat.SignedInt) output = Q.AudioOutput(qformat) output.setBufferSize(2 * 2 * 48000)
def _do_run(self) -> None: with withLock(self.Source._loading): if not self.Source.Container: self.Source.Container = av.open( self.Source.Source, options=self.Source.AVOption ) self.Source.duration = round(self.Source.Container.duration / 1000000, 2) self.Source.selectAudioStream = self.Source.Container.streams.audio[0] self.Source.FrameGenerator = self.Source.Container.decode( self.Source.selectAudioStream ) while not self.Source._end.is_set(): if self.Source.filter != self.Filter: self.Filter = self.Source.filter if self.Source.filter: self.FilterGraph = AudioFilter() self.FilterGraph.selectAudioStream = ( self.Source.selectAudioStream ) self.FilterGraph.setFilters(self.Filter) else: self.FilterGraph = None if not self.Resampler or self.Source._haveToReloadResampler.is_set(): self.Resampler = av.AudioResampler( format=av.AudioFormat("s16").packed, layout="stereo", rate=48000 ) self.Source._haveToReloadResampler.clear() _seek_locked = False if self.Source._seeking.locked(): self.Source._seeking.acquire() _seek_locked = True Frame = next(self.Source.FrameGenerator, None) if _seek_locked: self.Source._seeking.release() self.Source.AudioFifo.reset() if not Frame: self.Source.stop() break _current_position = float(Frame.pts * Frame.time_base) if self.FilterGraph: self.FilterGraph.push(Frame) Frame = self.FilterGraph.pull() if not Frame: continue Frame.pts = None try: Frame = self.Resampler.resample(Frame) except ValueError: self.Source._haveToReloadResampler.set() continue if not self.Source.AudioFifo.haveToFillBuffer.is_set(): self.Source.AudioFifo.haveToFillBuffer.wait() self.Source.AudioFifo.write(Frame) self.Source._position = _current_position if self.Source._waitforread.locked(): self.Source._waitforread.release()
from pytgcalls import GroupCallFactory import pyrogram import telethon import av API_HASH = None API_ID = None CHAT_PEER = '@tgcallschat' # chat or channel where you want to play audio SOURCE = 'input.mp3' # Audio file path or stream url: eg. https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_700KB.mp3 CLIENT_TYPE = GroupCallFactory.MTPROTO_CLIENT_TYPE.PYROGRAM # for Telethon uncomment line below #CLIENT_TYPE = GroupCallFactory.MTPROTO_CLIENT_TYPE.TELETHON fifo = av.AudioFifo(format='s16le') resampler = av.AudioResampler(format='s16', layout='stereo', rate=48000) def on_played_data(gc, length): data = fifo.read(length / 4) if data: data = data.to_ndarray().tobytes() return data async def main(client): await client.start() while not client.is_connected: await asyncio.sleep(1) group_call_factory = GroupCallFactory(client, CLIENT_TYPE)
arg_parser.add_argument('-l', '--layout') arg_parser.add_argument('-r', '--rate', type=int) arg_parser.add_argument('-s', '--size', type=int, default=1024) arg_parser.add_argument('-c', '--count', type=int, default=5) args = arg_parser.parse_args() ffplay = None container = av.open(args.path) stream = next(s for s in container.streams if s.type == 'audio') fifo = av.AudioFifo() if args.size else None resampler = av.AudioResampler( format=av.AudioFormat(args.format or stream.format.name).packed if args.format else None, layout=int(args.layout) if args.layout and args.layout.isdigit() else args.layout, rate=args.rate, ) if (args.format or args.layout or args.rate) else None read_count = 0 fifo_count = 0 sample_count = 0 for i, packet in enumerate(container.demux(stream)): for frame in packet.decode(): read_count += 1 print('>>>> %04d' % read_count, frame) if args.data:
for task in self.__tracks.values(): if task is not None: task.cancel() self.__tracks = {} def player_worker(loop, container, streams,, audio_track, video_track, quit_event, throttle_playback, copy_frame = False): audio_fifo = av.AudioFifo() audio_format_name = 's16' audio_layout_name = 'stereo' audio_sample_rate = 48000 audio_samples = 0 audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME) audio_resampler = av.AudioResampler( format=audio_format_name, layout=audio_layout_name, rate=audio_sample_rate) video_first_pts = None frame_time = None start_time = time.time() while not quit_event.is_set(): try: if not copy_frame: frame = next(container.decode(*streams)) else: frame = next(container.demux(*streams))
import numpy as np from stream import transforms, model, _tokenizer, test_wav, window_size, eval_args, lm_model import torchaudio import torch import torch.nn.functional as F import logging hidden_ = lm_model.init_hidden(1) lm_logist, lm_hidden = lm_model(torch.ones(1).long().unsqueeze(0), hidden_) av.logging.set_level(0) frames = 4 single_input_chunk = int(16 * 1000 * window_size * 3 - 1) buffer_size = single_input_chunk * frames + (frames - 1) resampler = av.AudioResampler("s16p", rate=16 * 1000, layout=1) buffers = [] bos = torch.ones((1, 1)).long() * 1 h_pre, (h, c) = model.decoder(model.embed(bos)) # decode first zero y_seq = [] encoder_h = None buffer = [] def reset_hidden_state(): global buffer global encoder_h global h_enc encoder_h = None
def stream_doom(yturl: str, speed=None, noise=None): """ Returns a generator of doomified mp3 frames """ in_file = av.open(yturl, options={'rtsp_transport': 'tcp'}) in_stream = in_file.streams.audio[0] in_codec = in_stream.codec_context out_codec = av.CodecContext.create('mp3', 'w') out_codec.rate = in_codec.rate out_codec.channels = in_codec.channels out_codec.format = in_codec.format resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout=in_codec.layout, rate=in_codec.rate * 1.4 if speed is None else 1 / speed, ) if in_codec.channels == 2: nf = 'vinyl.wav' elif in_codec.channels == 1: nf = 'vinylmono.wav' else: # TODO: Support 5.1 and other configs raise Exception('Too many audio channels in stream') noise = noise or 0.1 wet = 1 - noise def moving_average(a, n=3): ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n with wave.open(nf, 'rb') as vinyl: vinbuf = vinyl.readframes(int(out_codec.rate * 1.5)) b = np.frombuffer(vinbuf, dtype='i2').reshape((1, -1)) newframe = av.audio.frame.AudioFrame.from_ndarray(b, format='s16', layout=in_codec.layout.name) newframe.rate = out_codec.rate for p in out_codec.encode(newframe): yield p.to_bytes() for packet in in_file.demux(in_stream): for frame in packet.decode(): frame.pts = None buf = resampler.resample(frame).to_ndarray()[0] # reading in a frame of the vinyl vinbuf = vinyl.readframes(len(buf) // in_codec.channels) if len(vinbuf) < len(buf) * in_codec.channels: vinyl.rewind() vinbuf = vinyl.readframes(len(buf) // in_codec.channels) a = buf * wet b = np.frombuffer(vinbuf, dtype='i2') * noise mod = moving_average(a + b, n=7).astype('i2').reshape((1, -1)) newframe = av.audio.frame.AudioFrame.from_ndarray(mod, format='s16', layout=in_codec.layout.name) newframe.rate = out_codec.rate for p in out_codec.encode(newframe): yield p.to_bytes() for p in out_codec.encode(newframe): yield p.to_bytes() in_file.close()
def init_audio_sink(self): codecLatencySec = 0 self.pa = pyaudio.PyAudio() self.sink = self.pa.open(format=self.pa.get_format_from_width(2), channels=self.channel_count, rate=self.sample_rate, output=True) # nice Python3 crash if we don't check self.sink is null. Not harmful, but should check. if not self.sink: exit() # codec = None extradata = None if self.audio_format == AirplayAudFmt.ALAC_44100_16_2.value: extradata = self.set_alac_extradata(self, 44100, 16, 2) elif self.audio_format == AirplayAudFmt.ALAC_44100_24_2.value: extradata = self.set_alac_extradata(self, 44100, 24, 2) elif self.audio_format == AirplayAudFmt.ALAC_48000_16_2.value: extradata = self.set_alac_extradata(self, 48000, 16, 2) elif self.audio_format == AirplayAudFmt.ALAC_48000_24_2.value: extradata = self.set_alac_extradata(self, 48000, 24, 2) if 'ALAC' in self.af: self.codec = av.codec.Codec('alac', 'r') elif 'AAC' in self.af: self.codec = av.codec.Codec('aac', 'r') elif 'OPUS' in self.af: self.codec = av.codec.Codec('opus', 'r') # PCM elif 'PCM' and '_16_' in self.af: self.codec = av.codec.Codec('pcm_s16le_planar', 'r') elif 'PCM' and '_24_' in self.af: self.codec = av.codec.Codec('pcm_s24le', 'r') """ #It seems that these are not required. if 'ELD' in self.af: codecLatencySec = (2017 / self.sample_rate) elif'AAC_LC'in self.af: codecLatencySec = (2624 / self.sample_rate) codecLatencySec = 0 print('codecLatencySec:',codecLatencySec) """ if self.codec is not None: self.codecContext = av.codec.CodecContext.create(self.codec) self.codecContext.sample_rate = self.sample_rate self.codecContext.channels = self.channel_count self.codecContext.format = av.AudioFormat('s' + str(self.sample_size) + 'p') if extradata is not None: self.codecContext.extradata = extradata self.resampler = av.AudioResampler( format=av.AudioFormat('s' + str(self.sample_size)).packed, layout='stereo', rate=self.sample_rate, ) audioDevicelatency = \ self.pa.get_default_output_device_info()['defaultHighOutputLatency'] # defaultLowOutputLatency is also available print(f"audioDevicelatency (sec): {audioDevicelatency:0.5f}") pyAudioDelay = self.sink.get_output_latency() print(f"pyAudioDelay (sec): {pyAudioDelay:0.5f}") ptpDelay = 0.002 self.sample_delay = pyAudioDelay + audioDevicelatency + codecLatencySec + ptpDelay print(f"Total sample_delay (sec): {self.sample_delay:0.5f}")
def main(argv): ''' youtube-dl pip install av ''' print(FLAGS.url) filepath = 'bloom.mp3' save_strean = False command = ['youtube-dl', '-f', '91', '-g', FLAGS.url] proc = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=10**8) out, err = proc.communicate() videolink = out.decode("utf-8").strip() resampler = av.AudioResampler("s16p", layout=1, rate=16 * 1000) if save_strean: output_container = av.open(filepath, 'w') output_stream = output_container.add_stream('mp3') input_container = av.open(videolink) input_stream = input_container.streams.get(audio=0)[0] win_size = ( FLAGS.win_length + FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame - 1)) hop_size = ( FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame)) stream_decoder = OpenVINOStreamDecoder(FLAGS) track_counter = 0 buffers = torch.empty(0) for frame in input_container.decode(input_stream): frame.pts = None resample_frame = resampler.resample(frame) waveform = np.frombuffer( resample_frame.planes[0].to_bytes(), dtype='int16') waveform = torch.tensor(waveform.copy()) waveform = waveform.float() / 32768 # waveform = waveform.clamp(-1, 1) # waveform[waveform != waveform] = 0 if torch.isnan(waveform).any(): print("[NAN]", flush=True, end=" ") if len(buffers) < win_size: buffers = torch.cat([buffers, waveform], dim=0) else: print("[BUFFER OVERFLOW]", flush=True, end=" ") if len(buffers) >= win_size: waveform = buffers[:win_size] buffers = buffers[hop_size:] if torch.isnan(waveform).any(): print("[NAN] waveform", flush=True, end=" ") continue seq = stream_decoder.decode(waveform[None]) print(seq, end='', flush=True) track_counter += 1 if track_counter % 200 == 0: print('[reset state]') stream_decoder.reset() if save_strean: for packet in output_stream.encode(resample_frame): output_container.mux(packet) if save_strean: for packet in output_stream.encode(None): output_container.mux(packet) output_container.close()
def player_worker(loop, container, streams, audio_track, video_track, quit_event, throttle_playback): audio_fifo = av.AudioFifo() audio_format_name = "s16" audio_layout_name = "stereo" audio_sample_rate = 48000 audio_samples = 0 audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME) audio_resampler = av.AudioResampler(format=audio_format_name, layout=audio_layout_name, rate=audio_sample_rate) video_first_pts = None frame_time = None start_time = time.time() while not quit_event.is_set(): try: frame = next(container.decode(*streams)) except (av.AVError, StopIteration) as exc: if isinstance(exc, av.FFmpegError) and exc.errno == errno.EAGAIN: time.sleep(0.01) continue if audio_track: asyncio.run_coroutine_threadsafe(audio_track._queue.put(None), loop) if video_track: asyncio.run_coroutine_threadsafe(video_track._queue.put(None), loop) break # read up to 1 second ahead if throttle_playback: elapsed_time = time.time() - start_time if frame_time and frame_time > elapsed_time + 1: time.sleep(0.1) if isinstance(frame, AudioFrame) and audio_track: if (frame.format.name != audio_format_name or frame.layout.name != audio_layout_name or frame.sample_rate != audio_sample_rate): frame.pts = None frame = audio_resampler.resample(frame) # fix timestamps frame.pts = audio_samples frame.time_base = fractions.Fraction(1, audio_sample_rate) audio_samples += frame.samples audio_fifo.write(frame) while True: frame = audio_fifo.read(audio_samples_per_frame) if frame: frame_time = frame.time asyncio.run_coroutine_threadsafe( audio_track._queue.put(frame), loop) else: break elif isinstance(frame, VideoFrame) and video_track: if frame.pts is None: # pragma: no cover logger.warning( "MediaPlayer(%s) Skipping video frame with no pts", container.name) continue # video from a webcam doesn't start at pts 0, cancel out offset if video_first_pts is None: video_first_pts = frame.pts frame.pts -= video_first_pts frame_time = frame.time asyncio.run_coroutine_threadsafe(video_track._queue.put(frame), loop)
def play_video(self, avi_file): try: import av except ImportError: return False if not config['enable_avi_play']: return False avi_file = os.path.join(config['game_path'], avi_file) if os.path.exists(avi_file): self.screen_real = pg.display.set_mode( self.screen_real.get_size(), self.screen_real.get_flags(), 32) video = av.open(avi_file, metadata_encoding=encoding, metadata_errors='replace') astream = next(s for s in video.streams if s.type == 'audio') fw = BytesIO() wav = wave.open(fw, 'wb') resampler = av.AudioResampler( format=av.AudioFormat('s16').packed, layout='stereo', rate=config['samplerate'], ) wav.setparams( (2, 2, config['samplerate'], 0, 'NONE', "not compressed")) for packet in video.demux(astream): for frame in packet.decode(): frame = resampler.resample(frame) wav.writeframes(frame.planes[0].to_bytes()) wav.close() fw.seek(0) pg.mixer.music.load(fw) video = av.open(avi_file, metadata_encoding=encoding, metadata_errors='replace') vstream = next(s for s in video.streams if s.type == 'video') rate = int(round(1000 / vstream.rate)) pg.mixer.music.play() self.clear_key_state() other = not hasattr(pg.image, 'frombuffer') try: for packet in video.demux(vstream): for frame in packet.decode(): size = self.screen_real.get_size() curtime = pg.time.get_ticks() if other: img_obj = BytesIO() frame.to_image().save(img_obj, 'bmp') img_obj.seek(0) self.screen_real.blit( pg.transform.smoothscale( pg.image.load(img_obj), size), (0, 0)) else: data = frame.to_rgb().planes[0].to_bytes() self.screen_real.blit( pg.transform.smoothscale( pg.image.frombuffer( data, (288, 180), 'RGB'), size), (0, 0)) pg.display.flip() self.delay_until(curtime + rate) if self.input_state.key_press: raise KeyboardInterrupt except KeyboardInterrupt: pass finally: self.clear_key_state() if pg.mixer.get_init(): pg.mixer.music.pause() self.screen_real = pg.display.set_mode( self.screen_real.get_size(), self.screen_real.get_flags(), 8) self.set_palette(self.num_palette, self.night_palette) return True else: return False