def generate_audio_frame(pcm_mulaw=False): """Generate a blank audio frame.""" if pcm_mulaw: audio_frame = av.AudioFrame(format="s16", layout="mono", samples=1) audio_bytes = b"\x00\x00" else: audio_frame = av.AudioFrame(format="dbl", layout="mono", samples=1024) audio_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00" * 1024 audio_frame.planes[0].update(audio_bytes) audio_frame.sample_rate = AUDIO_SAMPLE_RATE audio_frame.time_base = Fraction(1, AUDIO_SAMPLE_RATE) return audio_frame
def test_pts_simple(self): fifo = av.AudioFifo() iframe = av.AudioFrame(samples=1024) iframe.pts = 0 iframe.sample_rate = 48000 iframe.time_base = "1/48000" fifo.write(iframe) oframe = fifo.read(512) self.assertTrue(oframe is not None) self.assertEqual(oframe.pts, 0) self.assertEqual(oframe.time_base, iframe.time_base) self.assertEqual(fifo.samples_written, 1024) self.assertEqual(fifo.samples_read, 512) self.assertEqual(fifo.pts_per_sample, 1.0) iframe.pts = 1024 fifo.write(iframe) oframe = fifo.read(512) self.assertTrue(oframe is not None) self.assertEqual(oframe.pts, 512) self.assertEqual(oframe.time_base, iframe.time_base) iframe.pts = 9999 # Wrong! self.assertRaises(ValueError, fifo.write, iframe)
def transcode(self, in_frame: np.ndarray, time_info: TimeInfo) -> T.Tuple[av.AudioFrame, float]: # Step 1: Decode PyAudio input frame tmp_frame = np.frombuffer(in_frame, dtype=self.dtype) tmp_frame.shape = 1, -1 chunk_length = tmp_frame.size / self.channels assert chunk_length == int(chunk_length) chunk_length = int(chunk_length) if av.AudioFormat(self.pyav_format).is_planar: assert tmp_frame.shape[0] == self.channels samples = tmp_frame.shape[1] else: assert tmp_frame.shape[0] == 1 samples = tmp_frame.shape[1] // self.channels out_frame = av.AudioFrame(format=self.pyav_format, layout=self.pyav_layout, samples=samples) for i, plane in enumerate(out_frame.planes): plane.update(tmp_frame[i, :]) out_frame.rate = int(self.frame_rate) out_frame.time_base = Fraction(1, int(self.frame_rate)) out_frame.pts = out_frame.samples * self.num_encoded_frames self.num_encoded_frames += 1 return out_frame, time_info.input_buffer_adc_time
def _frame_from_ndarray(array, channels, format): """ Construct a frame from a numpy array. """ format_dtypes = PyAVCodec._format_dtypes nb_channels = channels layout = PyAVCodec._channel_layout_names[channels] # map avcodec type to numpy type try: dtype = np.dtype(format_dtypes[format]) except KeyError: raise ValueError( 'Conversion from numpy array with format `%s` is not yet supported' % format) # nb_channels = len(av.AudioLayout(layout).channels) assert array.dtype == dtype assert array.ndim == 2 if av.AudioFormat(format).is_planar: assert array.shape[ 0] == nb_channels, f"array.shape={array.shape}, nb_channels={nb_channels}" samples = array.shape[1] else: assert array.shape[0] == 1 samples = array.shape[1] // nb_channels frame = av.AudioFrame(format=format, layout=layout, samples=samples) for i, plane in enumerate(frame.planes): plane.update(array[i, :]) return frame
def generate_audio_frame(): """Generate a blank audio frame.""" audio_frame = av.AudioFrame(format="dbl", layout="mono", samples=1024) # audio_bytes = b''.join(b'\x00\x00\x00\x00\x00\x00\x00\x00' # for i in range(0, 1024)) audio_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00" * 1024 audio_frame.planes[0].update(audio_bytes) audio_frame.sample_rate = AUDIO_SAMPLE_RATE audio_frame.time_base = Fraction(1, AUDIO_SAMPLE_RATE) return audio_frame
def f(frame_sample_size): frame = av.AudioFrame(samples=frame_sample_size, format=av_format, layout=av_layout) frame.pts = None frame.sample_rate = sample_rate for plane in frame.planes: buffer = np.frombuffer(plane, dtype=dtype) buffer[:] = 0 return frame
def audio_frame_to_avframe(frame): """ Convert an aiortc.AudioFrame to av.AudioFrame. """ assert frame.channels in [1, 2] assert frame.sample_width in [1, 2, 4] samples = len(frame.data) // (frame.channels * frame.sample_width) av_frame = av.AudioFrame( format='s%d' % (8 * frame.sample_width), layout='stereo' if frame.channels == 2 else 'mono', samples=samples) av_frame.planes[0].update(frame.data) av_frame.sample_rate = frame.sample_rate av_frame.time_base = frame.time_base return av_frame
def test_missing_time_base(self): fifo = av.AudioFifo() iframe = av.AudioFrame(samples=1024) iframe.pts = 0 iframe.sample_rate = 48000 fifo.write(iframe) oframe = fifo.read(512) self.assertTrue(oframe is not None) self.assertIsNone(oframe.pts) self.assertIsNone(oframe.time_base) self.assertEqual(oframe.sample_rate, iframe.sample_rate)
def test_pts_complex(self): fifo = av.AudioFifo() iframe = av.AudioFrame(samples=1024) iframe.pts = 0 iframe.sample_rate = 48000 iframe.time_base = "1/96000" fifo.write(iframe) iframe.pts = 2048 fifo.write(iframe) oframe = fifo.read_many(1024)[-1] self.assertEqual(oframe.pts, 2048) self.assertEqual(fifo.pts_per_sample, 2.0)
def transcode(self, in_frame: np.ndarray, time_info: TimeInfo) -> T.Tuple[av.AudioFrame, float]: # Step 1: Decode PyAudio input frame tmp_frame = np.fromstring(in_frame, dtype=self.dtype) chunk_length = len(tmp_frame) / self.channels assert chunk_length == int(chunk_length) chunk_length = int(chunk_length) tmp_frame = np.reshape(tmp_frame, (chunk_length, self.channels)) # Step 2: Encode PyAV output frame # Flatten in column-major (Fortran-style) order # Effectively converting the buffer to a planar audio frame tmp_frame = tmp_frame.flatten(order="F") chunk_length = len(tmp_frame) / self.channels assert chunk_length == int(chunk_length) chunk_length = int(chunk_length) tmp_frame = np.reshape(tmp_frame, (self.channels, chunk_length)) assert tmp_frame.ndim == 2 if av.AudioFormat(self.pyav_format).is_planar: assert tmp_frame.shape[0] == self.channels samples = tmp_frame.shape[1] else: assert tmp_frame.shape[0] == 1 samples = tmp_frame.shape[1] // self.channels out_frame = av.AudioFrame(format=self.pyav_format, layout=self.pyav_layout, samples=samples) for i, plane in enumerate(out_frame.planes): plane.update(tmp_frame[i, :]) out_frame.rate = int(self.frame_rate) out_frame.time_base = Fraction(1, int(self.frame_rate)) out_frame.pts = out_frame.samples * self.num_encoded_frames self.num_encoded_frames += 1 return out_frame, time_info.input_buffer_adc_time
def get_input(frame_num): """ Manually construct and update AudioFrame. Consider using AudioFrame.from_ndarry for most real life numpy->AudioFrame conversions. :param frame_num: :return: """ frame = av.AudioFrame(format=INPUT_FORMAT, layout=INPUT_CHANNEL_LAYOUT, samples=FRAME_SIZE) frame.sample_rate = INPUT_SAMPLE_RATE frame.pts = frame_num * FRAME_SIZE for i in range(len(frame.layout.channels)): data = np.zeros(FRAME_SIZE, dtype=af.format_dtypes[INPUT_FORMAT]) for j in range(FRAME_SIZE): data[j] = np.sin(2 * np.pi * (frame_num + j) * (i + 1) / float(FRAME_SIZE)) frame.planes[i].update(data) return frame