Ejemplo n.º 1
0
    def run(self):
        for input in self.input:
            container = av.open(input, 'r')

            resampler = av.AudioResampler(
                format=av.AudioFormat('s16'),
                layout=2,
                rate=44100,
            )

            for packet in container.demux():
                for frame in packet.decode():
                    type = packet.stream.type

                    video_frame = None
                    audio_frame = None

                    if type == 'video':
                        new_v_frame = frame.reformat(self.w, self.h, 'rgb24')
                        new_v_frame.pts = None
                        video_frame = new_v_frame.planes[0].to_bytes()

                    if type == 'audio':
                        frame.pts = None
                        new_a_frame = resampler.resample(frame)
                        audio_frame = new_a_frame.planes[0].to_bytes()

                    self.fifo.put([video_frame, audio_frame])
def convert(inputstreamfile, outputstream, format,codec, channel_layout, rate):
    try:
        # set input/output locations
        inp = av.open(inputstreamfile)
        #out = av.open(f"{outputfile}", 'w')
        out = av.open(outputstream,'w')
        #out_stream = out.add_stream(f"{codec}",rate=16000)
        out_stream = out.add_stream(codec_name=codec, rate=rate)

        # resampler object details how we want to change frame information
        resampler = av.AudioResampler(
            format=av.AudioFormat(format).packed,
            layout=channel_layout,
            rate=rate
        )

 
        # decode frames and start re-encoding into new file
        for frame in inp.decode(audio=0):
            frame.pts = None  # pts is presentation time-stamp. Not relevant here.

            frame = resampler.resample(frame)  # get current working frame and re-sample it for encoding

            for p in out_stream.encode(frame):  # encode the re-sampled frame
                out.mux(p)

        out.close()

    except Exception as ex:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
Ejemplo n.º 3
0
    def __init__(self,
                 video_path,
                 sampling_rate=1,
                 decode_lossy=False,
                 audio_resample_rate=None):
        """
        Arguments:
            video_path (str): path of the video to be loaded
        """
        self.container = av.open(video_path)
        self.sampling_rate = sampling_rate
        self.resampler = None
        if audio_resample_rate is not None:
            self.resampler = av.AudioResampler(rate=audio_resample_rate)

        if self.container.streams.video:
            # enable multi-threaded video decoding
            if decode_lossy:
                warnings.warn(
                    'VideoReader| thread_type==AUTO can yield potential frame dropping!',
                    RuntimeWarning)
                self.container.streams.video[0].thread_type = 'AUTO'
            self.video_stream = self.container.streams.video[0]
        else:
            self.video_stream = None
Ejemplo n.º 4
0
def player_worker(loop, container, audio_track, video_track, quit_event, throttle_playback):
    audio_fifo = av.AudioFifo()
    audio_format = av.AudioFormat('s16')
    audio_sample_rate = 48000
    audio_samples = 0
    audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME)
    audio_resampler = av.AudioResampler(
        format=audio_format,
        rate=audio_sample_rate)

    video_first_pts = None

    frame_time = None
    start_time = time.time()

    while not quit_event.is_set():
        try:
            frame = next(container.decode())
        except (av.AVError, StopIteration):
            if audio_track:
                asyncio.run_coroutine_threadsafe(audio_track._queue.put(None), loop)
            if video_track:
                asyncio.run_coroutine_threadsafe(video_track._queue.put(None), loop)
            break

        # read up to 1 second ahead
        if throttle_playback:
            elapsed_time = (time.time() - start_time)
            if frame_time and frame_time > elapsed_time + 1:
                time.sleep(0.1)

        if isinstance(frame, AudioFrame) and audio_track:
            if frame.format != audio_format or frame.sample_rate != audio_sample_rate:
                frame.pts = None
                frame = audio_resampler.resample(frame)

            # fix timestamps
            frame.pts = audio_samples
            frame.time_base = fractions.Fraction(1, audio_sample_rate)
            audio_samples += frame.samples

            audio_fifo.write(frame)
            while True:
                frame = audio_fifo.read(audio_samples_per_frame)
                if frame:
                    frame_time = frame.time
                    asyncio.run_coroutine_threadsafe(audio_track._queue.put(frame), loop)
                else:
                    break
        elif isinstance(frame, VideoFrame) and video_track:
            # video from a webcam doesn't start at pts 0, cancel out offset
            if frame.pts is not None:
                if video_first_pts is None:
                    video_first_pts = frame.pts
                frame.pts -= video_first_pts

            frame_time = frame.time
            asyncio.run_coroutine_threadsafe(video_track._queue.put(frame), loop)
Ejemplo n.º 5
0
def load_audio(file,
               sr,
               offset,
               duration,
               resample=True,
               approx=False,
               time_base='samples',
               check_duration=True):
    if time_base == 'sec':
        offset = offset * sr
        duration = duration * sr
    # Loads at target sr, stereo channels, seeks from offset, and stops after duration
    container = av.open(file)
    audio = container.streams.get(audio=0)[0]  # Only first audio stream
    audio_duration = audio.duration * float(audio.time_base)
    if approx:
        if offset + duration > audio_duration * sr:
            # Move back one window. Cap at audio_duration
            offset = np.min(audio_duration * sr - duration, offset - duration)
    else:
        if check_duration:
            assert offset + duration <= audio_duration * sr, f'End {offset + duration} beyond duration {audio_duration*sr}'
    if resample:
        resampler = av.AudioResampler(format='fltp', layout='stereo', rate=sr)
    else:
        assert sr == audio.sample_rate
    offset = int(
        offset / sr / float(audio.time_base)
    )  #int(offset / float(audio.time_base)) # Use units of time_base for seeking
    duration = int(
        duration
    )  #duration = int(duration * sr) # Use units of time_out ie 1/sr for returning
    sig = np.zeros((2, duration), dtype=np.float32)
    container.seek(offset, stream=audio)
    total_read = 0
    for frame in container.decode(audio=0):  # Only first audio stream
        if resample:
            frame.pts = None
            frame = resampler.resample(frame)
        if frame is None:
            break
        frame = frame.to_ndarray(
            format='fltp')  # Convert to floats and not int16
        read = frame.shape[-1]
        if total_read + read > duration:
            read = duration - total_read
        sig[:, total_read:total_read + read] = frame[:, :read]
        total_read += read
        if total_read == duration:
            break
    assert total_read <= duration, f'Expected {duration} frames, got {total_read}'
    return sig, sr
Ejemplo n.º 6
0
def decode_audio(data):
    decoded_audio = b''
    data = BytesIO(data)
    container = av.open(data)
    resampler = av.AudioResampler('s16', 'mono', 16000)

    audio_stream = next(s for s in container.streams if s.type == 'audio')
    for packet in container.demux(audio_stream):
        for frame in packet.decode():
            frame = resampler.resample(frame)
            decoded_audio += frame.planes[0].to_bytes()

    return np.frombuffer(decoded_audio, dtype=np.int16)
Ejemplo n.º 7
0
    def open(self, path, mono=False, sample_rate=None):
        """Open the audio resource."""
        self.path = path
        self.open_kargs = {'mono': mono, 'sample_rate': sample_rate}

        self.container = container = av.open(
            path,
            options={'usetoc': '1',
                     # Timeouts of I/O operations in µs and ms
                     'timeout': '5000000', 'listen_timeout': '5000'})
        # 'usetoc' is set to enable fast seek (see also
        # ffmpeg commit c43bd08 for a 'fastseek' option)
        log.debug('container: %s', container)
        stream = self.stream = \
            next(s for s in container.streams if s.type == 'audio')
        log.debug('stream: %s', stream)

        resampler = av.AudioResampler(
            format=av.AudioFormat('s16').packed,
            layout='mono' if mono else stream.layout,
            rate=sample_rate or stream.rate or 44100)

        def decode_iter():
            """Genrator reading and decoding the audio stream."""
            for packet in container.demux(stream):
                for frame in packet.decode():
                    self.last_frame_pts = frame.pts
                    # frame pts must be set to None
                    # (see https://github.com/mikeboers/PyAV/issues/281)
                    frame.pts = None
                    frame = resampler.resample(frame)
                    yield frame

        self.decode_iter = decode_iter()
        self.pos = 0

        # Duration in seconds
        if stream.duration:
            self.duration = int(stream.duration * stream.time_base)
        else:
            # It is certainly a web file
            log.info("No duration")
            self.duration = None

        self.num_channels = 1 if mono else stream.channels
        self.sample_rate = resampler.rate
Ejemplo n.º 8
0
    def __init__(self,
                 path,
                 output_chunk_size,
                 output_rate,
                 realtime=True,
                 time_limit=None,
                 output_format='s16',
                 output_layout='mono'):
        """

        :type path: str
        :type output_chunk_size: int
        :type output_rate: int
        :type realtime: bool
        :type time_limit: float
        """
        if output_format != 's16':
            raise NotImplementedError(
                'output_format {} is not supported.'.format(output_format))
        if output_layout != 'mono':
            raise NotImplementedError(
                'output_layout {} is not supported.'.format(output_layout))

        self._realtime = realtime
        self._chunk_size = output_chunk_size
        self._time_limit = time_limit
        self._bit_rate = output_rate * 16
        self._chunk_duration = output_chunk_size * 8 / self._bit_rate

        self._afi = AudioFrameIterable(path)
        self._resampler = av.AudioResampler(
            format=av.AudioFormat(output_format).packed,
            layout=output_layout,
            rate=output_rate,
        )

        self._buffer = b''
        self._timestamp = 0
        self._duration_processed = 0
Ejemplo n.º 9
0
    def demultiplexer(self, container):
        # resample audio line to the given format
        resampler = av.AudioResampler(
            format=av.AudioFormat('s16'),
            layout=2,
            rate=self.audio_rate,
        )

        # loop over the container
        for packet in container.demux():
            type = packet.stream.type
            # orig_fps = packet.stream.rate

            for frame in packet.decode():
                # current time in video clip
                # timestamp = float(frame.pts * packet.stream.time_base)

                video_frame = None
                audio_frame = None

                if type == 'video':
                    # print('video pts: {}'.format(frame.pts))
                    frame.pts = self.new_vid_pts
                    new_v_frame = frame.reformat(self.w, self.h, 'yuv420p')
                    video_frame = new_v_frame

                    self.new_vid_pts += 512

                if type == 'audio':
                    # print('audio pts: {}'.format(frame.pts))
                    frame.pts = None
                    new_a_frame = resampler.resample(frame)
                    audio_frame = new_a_frame

                # push to fifo buffer
                self.fifo.put([video_frame, audio_frame])
Ejemplo n.º 10
0
 def reloadResampler(self):
     self.Resampler = av.AudioResampler(
         format=av.AudioFormat('s16').packed,
         layout='stereo' if CHANNELS >= 2 else 'mono',
         rate=SAMPLING_RATE)
Ejemplo n.º 11
0
    def init_audio_sink(self):
        print("audit")
        self.pa = pyaudio.PyAudio()
        self.sink = self.pa.open(format=self.pa.get_format_from_width(2),
                                 channels=2,
                                 rate=44100,
                                 output=True)
        codec = None
        extradata = None
        if self.audio_format == Audio.AudioFormat.ALAC_44100_16_2.value:
            extradata = bytes([
                # Offset 0x00000000 to 0x00000035
                0x00,
                0x00,
                0x00,
                0x24,
                0x61,
                0x6c,
                0x61,
                0x63,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x01,
                0x60,
                0x00,
                0x10,
                0x28,
                0x0a,
                0x0e,
                0x02,
                0x00,
                0xff,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0x00,
                0xac,
                0x44
            ])
            codec = av.codec.Codec('alac', 'r')
        elif self.audio_format == Audio.AudioFormat.AAC_LC_44100_2.value:
            codec = av.codec.Codec('aac', 'r')

        if codec is not None:
            self.codecContext = av.codec.CodecContext.create(codec)
            self.codecContext.sample_rate = 44100
            self.codecContext.channels = 2
            self.codecContext.format = AudioFormat('s16p')
        if extradata is not None:
            self.codecContext.extradata = extradata

        self.resampler = av.AudioResampler(
            format=av.AudioFormat('s16').packed,
            layout='stereo',
            rate=44100,
        )
Ejemplo n.º 12
0
    def _enqueue(self, run, finished, filepath, vid_q, aud_q, vid_info,
                 *stindex):
        aud_resampler = av.AudioResampler(
            format=av.AudioFormat(
                's16p').packed,  # WAV PCM signed 16bit planar
            layout='stereo',
        )

        def decode():
            print 'started decoding and queueing'
            container = av.open(filepath)
            streams = [container.streams[indx] for indx in stindex]
            prev_video_frame = None
            prev_video_ts = None

            v_stream = container.streams.video[0]

            # Scale down to keep things fast.
            out_longest_side = max(self._vwidth, self._vheight)
            if v_stream.height > v_stream.width:
                scale_args = "w=min(%d,iw):h=-1:flags=area" % (
                    out_longest_side, )
            else:
                scale_args = "w=-1:h=min(%d,ih):flags=area" % (
                    out_longest_side, )

            filtergraph = av.filter.Graph()
            v_src = filtergraph.add_buffer(template=v_stream)
            v_bgr = filtergraph.add("format", "pix_fmts=bgr24")
            v_scale = filtergraph.add("scale", scale_args)
            v_snk = filtergraph.add("buffersink")
            v_src.link_to(v_bgr)
            v_bgr.link_to(v_scale)
            v_scale.link_to(v_snk)

            for packet in container.demux(streams):
                run.wait()
                for frame in packet.decode():
                    play_at = float(frame.time_base *
                                    frame.pts) if frame.pts else None
                    if isinstance(frame, av.AudioFrame):
                        frame_r = aud_resampler.resample(frame)
                        raw_audio = frame_r.planes[0].to_bytes()
                        aud_q.put(raw_audio)
                    elif isinstance(frame, av.VideoFrame):
                        # NOTE: use filtergraph to convert to bgr24 instead of
                        # frame.reformat(format='bgr24').
                        #
                        # For a yuv420p frame, with SIMD optimizations on,
                        # frame.reformat(format='bgr24') will fail to convert
                        # the last width%8 pixels on each row, leaving a
                        # stripe of uninitialized data down the right side.
                        #
                        # The problem is VideoFrame allocates buffers with
                        # align=1 instead of align=SIMD_width_of_cpu.
                        #
                        # libavfilter allocates buffers with align=32 so a
                        # doing the bgr24 conversion via a filtergraph works.
                        v_src.push(frame)
                        frame_bgr = v_snk.pull()

                        # frame.to_nd_array() expects buffers to be align=1 so
                        # we have to do this by hand
                        plane = frame_bgr.planes[0]
                        dtype = numpy.uint8
                        bytes_per_pixel = 3
                        frame_h, frame_w = frame_bgr.height, frame_bgr.width
                        buffer_w = plane.line_size / bytes_per_pixel
                        frame_bgr = numpy.frombuffer(plane, dtype).reshape(
                            frame_h, buffer_w, -1)[:frame_h, :frame_w]

                        vid_q.put((prev_video_frame, prev_video_ts, play_at
                                   or 0))
                        if vid_info['rotate'] == 90:
                            prev_video_frame = numpy.rot90(frame_bgr.copy(),
                                                           k=-1)
                        elif vid_info['rotate'] == 180:
                            prev_video_frame = numpy.fliplr(
                                numpy.flipud(frame_bgr.copy()))
                        elif vid_info['rotate'] == 270:
                            prev_video_frame = numpy.rot90(frame_bgr.copy())
                        else:
                            prev_video_frame = frame_bgr.copy()
                        prev_video_ts = play_at or 0
                    else:
                        print 'unknown frame', frame
            print 'finished decoding and queueing'

        decode()
        finished.set()
Ejemplo n.º 13
0
def worker(player, loop, container, streams, tracks, lock_tracks, quit_event,
           throttle_playback, audio_effect, video_effect):

    import fractions
    import time

    audio_fifo = av.AudioFifo()
    audio_format_name = "s16"
    audio_layout_name = "stereo"
    audio_sample_rate = 48000
    audio_samples = 0
    audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME)
    audio_resampler = av.AudioResampler(format=audio_format_name,
                                        layout=audio_layout_name,
                                        rate=audio_sample_rate)

    video_first_pts = None
    audio_frame_time = None
    start_time = time.time()

    audio_print_warning = True
    video_print_warning = True

    def iter_tracks(kind=None):
        with lock_tracks:
            for track in tracks:
                track = track()
                if track is not None:
                    if kind is None or kind == track.kind:
                        yield track

    def cleanup_tracks():

        with lock_tracks:
            to_remove = {track for track in tracks if track() is None}
            for track in to_remove:
                tracks.discard(track)

    def run_threadsafe(coro):
        asyncio.run_coroutine_threadsafe(coro, loop)

    def append_frame(frame, kind=None, force=True):

        for track in iter_tracks(kind=kind):
            if track._queue.full():

                # remove one frame and append the new frame
                if force:
                    run_threadsafe(track._queue.get())
                    run_threadsafe(track._queue.put(frame))

            else:
                run_threadsafe(track._queue.put(frame))

    while not quit_event.is_set():

        # clean invalid ref
        cleanup_tracks()

        # decode frame
        try:
            frame = next(container.decode(*streams))
        except (av.AVError, StopIteration):
            for track in iter_tracks():
                append_frame(None, force=True)
            break

        # read up to 1 second ahead
        if throttle_playback:
            elapsed_time = time.time() - start_time
            if audio_frame_time and audio_frame_time > elapsed_time + 1:
                time.sleep(0.1)

        # audio
        if isinstance(frame, av.AudioFrame) and (set(iter_tracks('audio'))
                                                 or player.always_running):

            if (frame.format.name != audio_format_name
                    or frame.layout.name != audio_layout_name
                    or frame.sample_rate != audio_sample_rate):
                frame.pts = None
                frame = audio_resampler.resample(frame)

            # fix timestamps
            frame.pts = audio_samples
            frame.time_base = fractions.Fraction(1, audio_sample_rate)
            audio_samples += frame.samples

            # apply audio effect
            if audio_effect is not None:

                try:
                    frame = audio_effect(loop, frame)
                    audio_print_warning = True
                except BaseException:
                    if audio_print_warning:
                        logger.exception('Failed to apply audio effect')
                        audio_print_warning = False

            audio_fifo.write(frame)
            while True:
                frame = audio_fifo.read(audio_samples_per_frame)
                if frame:
                    audio_frame_time = frame.time
                    append_frame(frame, 'audio')
                else:
                    break

        # video
        if isinstance(frame, av.VideoFrame) and (set(iter_tracks('video'))
                                                 or player.always_running):

            if frame.pts is None:  # pragma: no cover
                logger.warning("Skipping video frame with no pts")
                continue

            # video from a webcam doesn't start at pts 0, cancel out offset
            if video_first_pts is None:
                video_first_pts = frame.pts
            frame.pts -= video_first_pts

            # drop frame if too late
            if throttle_playback:
                elapsed_time = time.time() - start_time
                if elapsed_time - frame.time > 0.1:
                    continue

            # apply video effect
            if video_effect is not None:

                try:
                    frame = video_effect(loop, frame)
                    video_print_warning = True
                except BaseException:
                    if video_print_warning:
                        logger.exception('Failed to apply video effect')
                        video_print_warning = False

            append_frame(frame, 'video')
def live():
    '''
        youtube-dl
        pip install av
    '''
    filepath = './youtube_live.mp3'
    save_strean = True
    infinite = True
    duration = 50  # seconds

    command = ['youtube-dl', '-f', '91', '-g', FLAGS.url]
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=10**8)
    out, err = proc.communicate()
    videolink = out.decode("utf-8").strip()
    resampler = av.AudioResampler("s16p", layout=1, rate=16 * 1000)

    if not infinite and save_strean:
        output_container = av.open(filepath, 'w')
        output_stream = output_container.add_stream('mp3')

    input_container = av.open(videolink)
    input_stream = input_container.streams.get(audio=0)[0]

    win_size = (FLAGS.win_length + FLAGS.hop_length *
                (FLAGS.downsample * FLAGS.step_n_frame - 1))
    hop_size = (FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame))

    if FLAGS.stream_decoder == 'torch':
        stream_decoder = PytorchStreamDecoder(FLAGS)
    else:
        stream_decoder = OpenVINOStreamDecoder(FLAGS)

    # track_counter = 0
    begin_time = datetime.now()
    buffer = torch.empty(1, 0)
    blank_counter = 0
    for frame in input_container.decode(input_stream):
        frame.pts = None
        resample_frame = resampler.resample(frame)

        waveform = resample_frame.to_ndarray()
        waveform = torch.tensor(waveform.copy())
        waveform = waveform.float() / 32768

        if torch.isnan(waveform).any():
            print("[NAN]", flush=True, end=" ")

        if buffer.shape[1] < win_size:
            buffer = torch.cat([buffer, waveform], dim=-1)

        while buffer.shape[1] >= win_size:
            waveform = buffer[:, :win_size]
            buffer = buffer[:, hop_size:]
            if torch.isnan(waveform).any():
                print("[NAN] waveform", flush=True, end=" ")
                continue
            seq = stream_decoder.decode(waveform)
            if seq == "":
                blank_counter += 1
                if blank_counter == 35:
                    print(' [Background]')
                    stream_decoder.reset()
            else:
                blank_counter = 0
                print(seq, end='', flush=True)

        if not infinite and save_strean:
            for packet in output_stream.encode(resample_frame):
                output_container.mux(packet)

        if not infinite:
            if (datetime.now() - begin_time).total_seconds() > duration:
                break

    if not infinite and save_strean:
        for packet in output_stream.encode(None):
            output_container.mux(packet)
        output_container.close()
Ejemplo n.º 15
0
def convert_pyav(input, output, file_name, extension, a_codec, v_codec,
                 sample_rate, sample_fmt, channels):

    try:

        print('trying PyAV Method...')

        print('variables: ', input, output, file_name, extension, a_codec,
              v_codec, sample_rate, sample_fmt, channels)

        # I/O VARIABLES
        inp = av.open(input, 'r')
        out = av.open(output, 'w')
        # out_video_stream = out.add_stream(v_codec)
        out_audio_stream = out.add_stream(a_codec, rate=int(sample_rate))

        # RESAMPLER OBJECT (WARNING: ONLY SET RATE ON AUDIO-STREAM -- FORMATTING ISSUES)
        resampler = av.AudioResampler(
            format=sample_fmt,
            layout=channels,
        )
        """
            add_abuffer missing from filter.Graph(). Wait for stable release to implement. Use FFmpeg for now.
        """

        # graph = av.filter.Graph()
        #
        # fchain = []
        # iastrm = next(s for s in inp.streams if s.type == 'audio')
        #
        # frame_rate = str
        # sample_fmt = str
        # bit_depth = str
        # for s in inp.streams:
        #     if s.type == 'audio':
        #         sample_fmt = s.format.name
        #         frame_rate = s.sample_rate
        #
        #
        # channels = int
        # for frame in inp.decode(audio=0):
        #     channels = frame.layout.channels
        #
        # print(frame_rate, sample_fmt, channels)
        #
        # fchain.append(graph.add_abuffer(template=iastrm))
        # fchain.append(graph.add('silenceremove', 'stop_periods=-1:stop_duration=1:stop_threshold=-90dB'))
        # fchain[-2].link_to(fchain[-1])
        #
        # fchain.append(graph.add("buffersink"))  # graph must end with buffersink...?
        # fchain[-2].link_to(fchain[-1])
        #
        # for value, filter in enumerate(av.filter.filters_available):
        #     print(value, filter)

        # DECODING/ENCODING
        for frame in inp.decode(audio=0):
            frame.pts = None  # pts is presentation time-stamp. Not relevant here.

            frame = resampler.resample(
                frame)  # get current working frame and re-sample it for encode

            for packet in out_audio_stream.encode(
                    frame):  # encode the re-sampled frame
                out.mux(packet)
            """
                wait for add_abuffer in next update for this...
            """

            # fchain[0].push(frame)
            # ofr = fchain[-1].pull()
            # ofr.pts = None

            # for p in out_audio_stream.encode(ofr):  # 'p' stands for packet
            #     out.mux(p)

        for packet in out_audio_stream.encode(None):  # 'p' stands for packet
            out.mux(packet)

        out.close()

    except Exception as e:
        settings.exception_counter += 1
        logger.error('admin_message',
                     msg='Could not convert the file',
                     exc_info=e)
Ejemplo n.º 16
0
from qtproxy import Q

import av


parser = argparse.ArgumentParser()
parser.add_argument('path')
args = parser.parse_args()

container = av.open(args.path)
stream = next(s for s in container.streams if s.type == 'audio')

fifo = av.AudioFifo()
resampler = av.AudioResampler(
    format=av.AudioFormat('s16').packed,
    layout='stereo',
    rate=48000,
)



qformat = Q.AudioFormat()
qformat.setByteOrder(Q.AudioFormat.LittleEndian)
qformat.setChannelCount(2)
qformat.setCodec('audio/pcm')
qformat.setSampleRate(48000)
qformat.setSampleSize(16)
qformat.setSampleType(Q.AudioFormat.SignedInt)

output = Q.AudioOutput(qformat)
output.setBufferSize(2 * 2 * 48000)
Ejemplo n.º 17
0
    def _do_run(self) -> None:
        with withLock(self.Source._loading):
            if not self.Source.Container:
                self.Source.Container = av.open(
                    self.Source.Source, options=self.Source.AVOption
                )
            self.Source.duration = round(self.Source.Container.duration / 1000000, 2)

            self.Source.selectAudioStream = self.Source.Container.streams.audio[0]
            self.Source.FrameGenerator = self.Source.Container.decode(
                self.Source.selectAudioStream
            )

            while not self.Source._end.is_set():
                if self.Source.filter != self.Filter:
                    self.Filter = self.Source.filter

                    if self.Source.filter:
                        self.FilterGraph = AudioFilter()
                        self.FilterGraph.selectAudioStream = (
                            self.Source.selectAudioStream
                        )
                        self.FilterGraph.setFilters(self.Filter)
                    else:
                        self.FilterGraph = None

                if not self.Resampler or self.Source._haveToReloadResampler.is_set():
                    self.Resampler = av.AudioResampler(
                        format=av.AudioFormat("s16").packed, layout="stereo", rate=48000
                    )
                    self.Source._haveToReloadResampler.clear()

                _seek_locked = False
                if self.Source._seeking.locked():
                    self.Source._seeking.acquire()
                    _seek_locked = True

                Frame = next(self.Source.FrameGenerator, None)

                if _seek_locked:
                    self.Source._seeking.release()
                    self.Source.AudioFifo.reset()

                if not Frame:
                    self.Source.stop()
                    break

                _current_position = float(Frame.pts * Frame.time_base)

                if self.FilterGraph:
                    self.FilterGraph.push(Frame)
                    Frame = self.FilterGraph.pull()

                    if not Frame:
                        continue

                Frame.pts = None
                try:
                    Frame = self.Resampler.resample(Frame)
                except ValueError:
                    self.Source._haveToReloadResampler.set()
                    continue

                if not self.Source.AudioFifo.haveToFillBuffer.is_set():
                    self.Source.AudioFifo.haveToFillBuffer.wait()

                self.Source.AudioFifo.write(Frame)
                self.Source._position = _current_position

                if self.Source._waitforread.locked():
                    self.Source._waitforread.release()
Ejemplo n.º 18
0
from pytgcalls import GroupCallFactory
import pyrogram
import telethon
import av

API_HASH = None
API_ID = None

CHAT_PEER = '@tgcallschat'  # chat or channel where you want to play audio
SOURCE = 'input.mp3' # Audio file path or stream url: eg. https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_700KB.mp3
CLIENT_TYPE = GroupCallFactory.MTPROTO_CLIENT_TYPE.PYROGRAM
# for Telethon uncomment line below
#CLIENT_TYPE = GroupCallFactory.MTPROTO_CLIENT_TYPE.TELETHON

fifo = av.AudioFifo(format='s16le')
resampler = av.AudioResampler(format='s16', layout='stereo', rate=48000)


def on_played_data(gc, length):
    data = fifo.read(length / 4)
    if data:
        data = data.to_ndarray().tobytes()
    return data


async def main(client):
    await client.start()
    while not client.is_connected:
        await asyncio.sleep(1)

    group_call_factory = GroupCallFactory(client, CLIENT_TYPE)
Ejemplo n.º 19
0
arg_parser.add_argument('-l', '--layout')
arg_parser.add_argument('-r', '--rate', type=int)
arg_parser.add_argument('-s', '--size', type=int, default=1024)
arg_parser.add_argument('-c', '--count', type=int, default=5)
args = arg_parser.parse_args()

ffplay = None

container = av.open(args.path)
stream = next(s for s in container.streams if s.type == 'audio')

fifo = av.AudioFifo() if args.size else None
resampler = av.AudioResampler(
    format=av.AudioFormat(args.format or stream.format.name).packed
    if args.format else None,
    layout=int(args.layout)
    if args.layout and args.layout.isdigit() else args.layout,
    rate=args.rate,
) if (args.format or args.layout or args.rate) else None

read_count = 0
fifo_count = 0
sample_count = 0

for i, packet in enumerate(container.demux(stream)):

    for frame in packet.decode():

        read_count += 1
        print('>>>> %04d' % read_count, frame)
        if args.data:
Ejemplo n.º 20
0
        for task in self.__tracks.values():
            if task is not None:
                task.cancel()
        self.__tracks = {}


def player_worker(loop, container, streams,, audio_track, video_track, quit_event,
                  throttle_playback, copy_frame = False):
    audio_fifo = av.AudioFifo()
    audio_format_name = 's16'
    audio_layout_name = 'stereo'
    audio_sample_rate = 48000
    audio_samples = 0
    audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME)
    audio_resampler = av.AudioResampler(
        format=audio_format_name,
        layout=audio_layout_name,
        rate=audio_sample_rate)

    video_first_pts = None

    frame_time = None
    start_time = time.time()

    while not quit_event.is_set():
        try:
            
            if not copy_frame:
                frame = next(container.decode(*streams))
            else:
                frame = next(container.demux(*streams))
            
import numpy as np
from stream import transforms, model, _tokenizer, test_wav, window_size, eval_args, lm_model
import torchaudio
import torch
import torch.nn.functional as F
import logging

hidden_ = lm_model.init_hidden(1)
lm_logist, lm_hidden = lm_model(torch.ones(1).long().unsqueeze(0), hidden_)

av.logging.set_level(0)

frames = 4
single_input_chunk = int(16 * 1000 * window_size * 3 - 1)
buffer_size = single_input_chunk * frames + (frames - 1)
resampler = av.AudioResampler("s16p", rate=16 * 1000, layout=1)
buffers = []

bos = torch.ones((1, 1)).long() * 1
h_pre, (h, c) = model.decoder(model.embed(bos))  # decode first zero
y_seq = []

encoder_h = None
buffer = []


def reset_hidden_state():
    global buffer
    global encoder_h
    global h_enc
    encoder_h = None
Ejemplo n.º 22
0
def stream_doom(yturl: str, speed=None, noise=None):
  """ Returns a generator of doomified mp3 frames """

  in_file = av.open(yturl, options={'rtsp_transport': 'tcp'})
  in_stream = in_file.streams.audio[0]
  in_codec = in_stream.codec_context

  out_codec = av.CodecContext.create('mp3', 'w')
  out_codec.rate = in_codec.rate 
  out_codec.channels = in_codec.channels 
  out_codec.format = in_codec.format 

  resampler = av.AudioResampler(
      format=av.AudioFormat('s16').packed,
      layout=in_codec.layout,
      rate=in_codec.rate * 1.4 if speed is None else 1 / speed,
  )

  if in_codec.channels == 2:
    nf = 'vinyl.wav'
  elif in_codec.channels == 1:
    nf = 'vinylmono.wav'
  else:
    # TODO: Support 5.1 and other configs
    raise Exception('Too many audio channels in stream')

  noise = noise or 0.1
  wet = 1 - noise

  def moving_average(a, n=3):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

  with wave.open(nf, 'rb') as vinyl:
    vinbuf = vinyl.readframes(int(out_codec.rate * 1.5))
    b = np.frombuffer(vinbuf, dtype='i2').reshape((1, -1))
    newframe = av.audio.frame.AudioFrame.from_ndarray(b, format='s16', layout=in_codec.layout.name)
    newframe.rate = out_codec.rate
    for p in out_codec.encode(newframe):
      yield p.to_bytes()

    for packet in in_file.demux(in_stream):
      for frame in packet.decode():
        frame.pts = None
        buf = resampler.resample(frame).to_ndarray()[0]
        # reading in a frame of the vinyl
        vinbuf = vinyl.readframes(len(buf) // in_codec.channels)
        if len(vinbuf) < len(buf) * in_codec.channels:
          vinyl.rewind()
          vinbuf = vinyl.readframes(len(buf) // in_codec.channels)
        a = buf * wet
        b = np.frombuffer(vinbuf, dtype='i2') * noise
        mod = moving_average(a + b, n=7).astype('i2').reshape((1, -1))
        
        newframe = av.audio.frame.AudioFrame.from_ndarray(mod, format='s16', layout=in_codec.layout.name)
        newframe.rate = out_codec.rate
        for p in out_codec.encode(newframe):
          yield p.to_bytes()

    for p in out_codec.encode(newframe):
      yield p.to_bytes()

  in_file.close()
Ejemplo n.º 23
0
    def init_audio_sink(self):
        codecLatencySec = 0
        self.pa = pyaudio.PyAudio()
        self.sink = self.pa.open(format=self.pa.get_format_from_width(2),
                                 channels=self.channel_count,
                                 rate=self.sample_rate,
                                 output=True)
        # nice Python3 crash if we don't check self.sink is null. Not harmful, but should check.
        if not self.sink:
            exit()
        # codec = None
        extradata = None
        if self.audio_format == AirplayAudFmt.ALAC_44100_16_2.value:
            extradata = self.set_alac_extradata(self, 44100, 16, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_44100_24_2.value:
            extradata = self.set_alac_extradata(self, 44100, 24, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_48000_16_2.value:
            extradata = self.set_alac_extradata(self, 48000, 16, 2)
        elif self.audio_format == AirplayAudFmt.ALAC_48000_24_2.value:
            extradata = self.set_alac_extradata(self, 48000, 24, 2)

        if 'ALAC' in self.af:
            self.codec = av.codec.Codec('alac', 'r')
        elif 'AAC' in self.af:
            self.codec = av.codec.Codec('aac', 'r')
        elif 'OPUS' in self.af:
            self.codec = av.codec.Codec('opus', 'r')
        # PCM
        elif 'PCM' and '_16_' in self.af:
            self.codec = av.codec.Codec('pcm_s16le_planar', 'r')
        elif 'PCM' and '_24_' in self.af:
            self.codec = av.codec.Codec('pcm_s24le', 'r')
        """
        #It seems that these are not required.
        if  'ELD'   in self.af:
            codecLatencySec = (2017 / self.sample_rate)
        elif'AAC_LC'in self.af:
            codecLatencySec = (2624 / self.sample_rate)
        codecLatencySec = 0
        print('codecLatencySec:',codecLatencySec)
        """

        if self.codec is not None:
            self.codecContext = av.codec.CodecContext.create(self.codec)
            self.codecContext.sample_rate = self.sample_rate
            self.codecContext.channels = self.channel_count
            self.codecContext.format = av.AudioFormat('s' +
                                                      str(self.sample_size) +
                                                      'p')
        if extradata is not None:
            self.codecContext.extradata = extradata

        self.resampler = av.AudioResampler(
            format=av.AudioFormat('s' + str(self.sample_size)).packed,
            layout='stereo',
            rate=self.sample_rate,
        )

        audioDevicelatency = \
            self.pa.get_default_output_device_info()['defaultHighOutputLatency']
        # defaultLowOutputLatency is also available
        print(f"audioDevicelatency (sec): {audioDevicelatency:0.5f}")
        pyAudioDelay = self.sink.get_output_latency()
        print(f"pyAudioDelay (sec): {pyAudioDelay:0.5f}")
        ptpDelay = 0.002
        self.sample_delay = pyAudioDelay + audioDevicelatency + codecLatencySec + ptpDelay
        print(f"Total sample_delay (sec): {self.sample_delay:0.5f}")
Ejemplo n.º 24
0
def main(argv):
    '''
        youtube-dl
        pip install av
    '''
    print(FLAGS.url)

    filepath = 'bloom.mp3'
    save_strean = False

    command = ['youtube-dl', '-f', '91', '-g', FLAGS.url]
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=10**8)
    out, err = proc.communicate()
    videolink = out.decode("utf-8").strip()

    resampler = av.AudioResampler("s16p", layout=1, rate=16 * 1000)

    if save_strean:
        output_container = av.open(filepath, 'w')
        output_stream = output_container.add_stream('mp3')

    input_container = av.open(videolink)
    input_stream = input_container.streams.get(audio=0)[0]

    win_size = (
        FLAGS.win_length +
        FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame - 1))
    hop_size = (
        FLAGS.hop_length * (FLAGS.downsample * FLAGS.step_n_frame))

    stream_decoder = OpenVINOStreamDecoder(FLAGS)

    track_counter = 0
    buffers = torch.empty(0)
    for frame in input_container.decode(input_stream):
        frame.pts = None
        resample_frame = resampler.resample(frame)

        waveform = np.frombuffer(
            resample_frame.planes[0].to_bytes(), dtype='int16')
        waveform = torch.tensor(waveform.copy())
        waveform = waveform.float() / 32768

        # waveform = waveform.clamp(-1, 1)
        # waveform[waveform != waveform] = 0
        if torch.isnan(waveform).any():
            print("[NAN]", flush=True, end=" ")

        if len(buffers) < win_size:
            buffers = torch.cat([buffers, waveform], dim=0)
        else:
            print("[BUFFER OVERFLOW]", flush=True, end=" ")

        if len(buffers) >= win_size:
            waveform = buffers[:win_size]
            buffers = buffers[hop_size:]
            if torch.isnan(waveform).any():
                print("[NAN] waveform", flush=True, end=" ")
                continue

            seq = stream_decoder.decode(waveform[None])
            print(seq, end='', flush=True)

            track_counter += 1
            if track_counter % 200 == 0:
                print('[reset state]')
                stream_decoder.reset()

        if save_strean:
            for packet in output_stream.encode(resample_frame):
                output_container.mux(packet)

    if save_strean:
        for packet in output_stream.encode(None):
            output_container.mux(packet)
        output_container.close()
Ejemplo n.º 25
0
def player_worker(loop, container, streams, audio_track, video_track,
                  quit_event, throttle_playback):
    audio_fifo = av.AudioFifo()
    audio_format_name = "s16"
    audio_layout_name = "stereo"
    audio_sample_rate = 48000
    audio_samples = 0
    audio_samples_per_frame = int(audio_sample_rate * AUDIO_PTIME)
    audio_resampler = av.AudioResampler(format=audio_format_name,
                                        layout=audio_layout_name,
                                        rate=audio_sample_rate)

    video_first_pts = None

    frame_time = None
    start_time = time.time()

    while not quit_event.is_set():
        try:
            frame = next(container.decode(*streams))
        except (av.AVError, StopIteration) as exc:
            if isinstance(exc, av.FFmpegError) and exc.errno == errno.EAGAIN:
                time.sleep(0.01)
                continue
            if audio_track:
                asyncio.run_coroutine_threadsafe(audio_track._queue.put(None),
                                                 loop)
            if video_track:
                asyncio.run_coroutine_threadsafe(video_track._queue.put(None),
                                                 loop)
            break

        # read up to 1 second ahead
        if throttle_playback:
            elapsed_time = time.time() - start_time
            if frame_time and frame_time > elapsed_time + 1:
                time.sleep(0.1)

        if isinstance(frame, AudioFrame) and audio_track:
            if (frame.format.name != audio_format_name
                    or frame.layout.name != audio_layout_name
                    or frame.sample_rate != audio_sample_rate):
                frame.pts = None
                frame = audio_resampler.resample(frame)

            # fix timestamps
            frame.pts = audio_samples
            frame.time_base = fractions.Fraction(1, audio_sample_rate)
            audio_samples += frame.samples

            audio_fifo.write(frame)
            while True:
                frame = audio_fifo.read(audio_samples_per_frame)
                if frame:
                    frame_time = frame.time
                    asyncio.run_coroutine_threadsafe(
                        audio_track._queue.put(frame), loop)
                else:
                    break
        elif isinstance(frame, VideoFrame) and video_track:
            if frame.pts is None:  # pragma: no cover
                logger.warning(
                    "MediaPlayer(%s) Skipping video frame with no pts",
                    container.name)
                continue

            # video from a webcam doesn't start at pts 0, cancel out offset
            if video_first_pts is None:
                video_first_pts = frame.pts
            frame.pts -= video_first_pts

            frame_time = frame.time
            asyncio.run_coroutine_threadsafe(video_track._queue.put(frame),
                                             loop)
Ejemplo n.º 26
0
    def play_video(self, avi_file):
        try:
            import av
        except ImportError:
            return False
        if not config['enable_avi_play']:
            return False
        avi_file = os.path.join(config['game_path'], avi_file)
        if os.path.exists(avi_file):
            self.screen_real = pg.display.set_mode(
                self.screen_real.get_size(), self.screen_real.get_flags(), 32)
            video = av.open(avi_file,
                            metadata_encoding=encoding,
                            metadata_errors='replace')
            astream = next(s for s in video.streams if s.type == 'audio')
            fw = BytesIO()
            wav = wave.open(fw, 'wb')
            resampler = av.AudioResampler(
                format=av.AudioFormat('s16').packed,
                layout='stereo',
                rate=config['samplerate'],
            )
            wav.setparams(
                (2, 2, config['samplerate'], 0, 'NONE', "not compressed"))
            for packet in video.demux(astream):
                for frame in packet.decode():
                    frame = resampler.resample(frame)
                    wav.writeframes(frame.planes[0].to_bytes())
            wav.close()
            fw.seek(0)
            pg.mixer.music.load(fw)

            video = av.open(avi_file,
                            metadata_encoding=encoding,
                            metadata_errors='replace')
            vstream = next(s for s in video.streams if s.type == 'video')
            rate = int(round(1000 / vstream.rate))
            pg.mixer.music.play()
            self.clear_key_state()
            other = not hasattr(pg.image, 'frombuffer')

            try:
                for packet in video.demux(vstream):
                    for frame in packet.decode():
                        size = self.screen_real.get_size()
                        curtime = pg.time.get_ticks()
                        if other:
                            img_obj = BytesIO()
                            frame.to_image().save(img_obj, 'bmp')
                            img_obj.seek(0)
                            self.screen_real.blit(
                                pg.transform.smoothscale(
                                    pg.image.load(img_obj), size), (0, 0))
                        else:
                            data = frame.to_rgb().planes[0].to_bytes()
                            self.screen_real.blit(
                                pg.transform.smoothscale(
                                    pg.image.frombuffer(
                                        data, (288, 180), 'RGB'), size),
                                (0, 0))
                        pg.display.flip()

                        self.delay_until(curtime + rate)
                        if self.input_state.key_press:
                            raise KeyboardInterrupt
            except KeyboardInterrupt:
                pass
            finally:
                self.clear_key_state()
                if pg.mixer.get_init():
                    pg.mixer.music.pause()

            self.screen_real = pg.display.set_mode(
                self.screen_real.get_size(), self.screen_real.get_flags(), 8)
            self.set_palette(self.num_palette, self.night_palette)
            return True
        else:
            return False