async def recv(self): if self.readyState != "live": raise MediaStreamError data = await self.__queue.get() if not data: self.stop() raise MediaStreamError try: indata, _ = data frame = AudioFrame.from_ndarray(indata.reshape(indata.shape[::-1]), format='s16', layout='mono') sample_rate = indata.shape[0] if hasattr(self, "_timestamp"): samples = int((time.time() - self._start) * sample_rate) self._timestamp += samples else: self._start = time.time() self._timestamp = 0 frame.pts = self._timestamp frame.sample_rate = sample_rate frame.time_base = fractions.Fraction(1, sample_rate) return frame except: Logger.exception('Audio:') self.stop() raise MediaStreamError
async def recv(self) -> Frame: """ Receive the next :class:`~av.audio.frame.AudioFrame`. The base implementation just reads silence, subclass :class:`AudioStreamTrack` to provide a useful implementation. """ if self.readyState != "live": raise Exception("media stream error") # MediaStreamError sample_rate = 16000 # 8000 samples = int(AUDIO_PTIME * sample_rate) # logger.info("WTF!!") if hasattr(self, "_timestamp"): self._timestamp += samples wait = self._start + (self._timestamp / sample_rate) - time.time() # logger.info("wating... {0}".format(wait)) await asyncio.sleep(wait) else: self._start = time.time() self._timestamp = 0 # await asyncio.sleep(0.01) try: # data = sounddevice.rec(frames=samples, samplerate=sample_rate, channels=1, dtype='int16', blocking=True) data, overflowed = self.stream.read(frames=samples) #print("data", data) #print("data", data.shape) #data = np.zeros((2,1152), dtype='int16') #logger.info(data) # frameb = AudioFrame(format="s16", layout="mono", samples=samples) # logger.info('*****************************') # alt_data = frameb.to_ndarray() # logger.info(alt_data) # logger.info(alt_data.shape) # logger.info('-----------------') fixed_data = np.swapaxes(data, 0, 1) #print(fixed_data.shape) #print(fixed_data.dtype) #print(data.dtype) #print(fixed_data.dtype) frame = AudioFrame.from_ndarray( fixed_data, layout="mono", format='s16') # layout="stereo", format='s32') # s16 #for p in frame.planes: # p.update(data.tobytes()) #bytes(p.buffer_size)) # logger.info(frame.planes) # logger.info("\n!!!!!!!!!!!") frame.pts = self._timestamp frame.sample_rate = sample_rate frame.time_base = fractions.Fraction(1, sample_rate) except Exception as exc: print(exc) logger.exception(exc) return frame
def test_ndarray_u8(self): layouts = [ ('u8', 'mono', 'u1', (1, 160)), ('u8', 'stereo', 'u1', (1, 320)), ('u8p', 'mono', 'u1', (1, 160)), ('u8p', 'stereo', 'u1', (2, 160)), ] for format, layout, dtype, size in layouts: array = numpy.random.randint(0, 256, size=size, dtype=dtype) frame = AudioFrame.from_ndarray(array, format=format, layout=layout) self.assertEqual(frame.format.name, format) self.assertEqual(frame.layout.name, layout) self.assertEqual(frame.samples, 160) self.assertTrue((frame.to_ndarray() == array).all())
def test_ndarray_u8(self): layouts = [ ("u8", "mono", "u1", (1, 160)), ("u8", "stereo", "u1", (1, 320)), ("u8p", "mono", "u1", (1, 160)), ("u8p", "stereo", "u1", (2, 160)), ] for format, layout, dtype, size in layouts: array = numpy.random.randint(0, 256, size=size, dtype=dtype) frame = AudioFrame.from_ndarray(array, format=format, layout=layout) self.assertEqual(frame.format.name, format) self.assertEqual(frame.layout.name, layout) self.assertEqual(frame.samples, 160) self.assertNdarraysEqual(frame.to_ndarray(), array)
def test_ndarray_dbl(self): layouts = [ ('dbl', 'mono', '<f8', (1, 160)), ('dbl', 'stereo', '<f8', (1, 320)), ('dblp', 'mono', '<f8', (1, 160)), ('dblp', 'stereo', '<f8', (2, 160)), ] for format, layout, dtype, size in layouts: array = numpy.ndarray(shape=size, dtype=dtype) for i in range(size[0]): array[i][:] = numpy.random.rand(size[1]) frame = AudioFrame.from_ndarray(array, format=format, layout=layout) self.assertEqual(frame.format.name, format) self.assertEqual(frame.layout.name, layout) self.assertEqual(frame.samples, 160) self.assertTrue((frame.to_ndarray() == array).all())
def process_session(loop, session, track, quit_event): while not quit_event.is_set(): while len(session.tracks['Joe']) - track.last_idx < 200: continue curr_len = track.last_idx + 200 curr_nd = np.array([session.tracks['Joe'][track.last_idx:curr_len] ]).view(np.int16) #print(f"New frame: {curr_nd}") frame = AudioFrame.from_ndarray(curr_nd, format='s16', layout='mono') frame.sample_rate = 22050 frame.pts = track.last_idx track.last_idx = curr_len asyncio.run_coroutine_threadsafe(track.q.put(frame), loop)
def test_ndarray_dbl(self): layouts = [ ("dbl", "mono", "f8", (1, 160)), ("dbl", "stereo", "f8", (1, 320)), ("dblp", "mono", "f8", (1, 160)), ("dblp", "stereo", "f8", (2, 160)), ] for format, layout, dtype, size in layouts: array = numpy.ndarray(shape=size, dtype=dtype) for i in range(size[0]): array[i][:] = numpy.random.rand(size[1]) frame = AudioFrame.from_ndarray(array, format=format, layout=layout) self.assertEqual(frame.format.name, format) self.assertEqual(frame.layout.name, layout) self.assertEqual(frame.samples, 160) self.assertNdarraysEqual(frame.to_ndarray(), array)
def test_from_ndarray_value_error(self): # incorrect dtype array = numpy.ndarray(shape=(1, 160), dtype="f2") with self.assertRaises(ValueError) as cm: AudioFrame.from_ndarray(array, format="flt", layout="mono") self.assertEqual( str(cm.exception), "Expected numpy array with dtype `float32` but got `float16`", ) # incorrect number of dimensions array = numpy.ndarray(shape=(1, 160, 2), dtype="f4") with self.assertRaises(ValueError) as cm: AudioFrame.from_ndarray(array, format="flt", layout="mono") self.assertEqual(str(cm.exception), "Expected numpy array with ndim `2` but got `3`") # incorrect shape array = numpy.ndarray(shape=(2, 160), dtype="f4") with self.assertRaises(ValueError) as cm: AudioFrame.from_ndarray(array, format="flt", layout="mono") self.assertEqual(str(cm.exception), "Unexpected numpy array shape `(2, 160)`")
async def recv(self): if self._startTime is None and self._startedCallback is not None: self._startedCallback() try: data = await self._audioSubscription.get() except SubscriptionClosed: self._log.debug( "Audio track finished. raising MediaStreamError to shut down connection" ) self.stop() raise MediaStreamError except: self._log.exception("Got unknown error. Crashing audio stream") self.stop() raise MediaStreamError # self._log.exception("FAIL") # self._log.info("GOT AUDIO %d,%d", data.shape[0], data.shape[1]) # self._log.info("Creating FRAME") # https://trac.ffmpeg.org/wiki/audio%20types # https://github.com/mikeboers/PyAV/blob/develop/av/audio/frame.pyx # https://ffmpeg.org/doxygen/3.0/group__channel__mask__c.html # # It looks like at the time of writing, audio samples are only accepted as s16, # and not as float (flt) in aiortc. We therefore use s16 as format instead of flt, # and convert the data: # https://github.com/jlaine/aiortc/blob/master/aiortc/codecs/opus.py # We therefore force a conversion to 16 bit integer: data = (np.clip(data, -1, 1) * 32768).astype(np.int16) new_frame = AudioFrame.from_ndarray(data.reshape(1, -1), format="s16", layout=str(data.shape[1]) + "c") # Use the sample rate for the base clock new_frame.sample_rate = self._sampleRate new_frame.time_base = fractions.Fraction(1, self._sampleRate) if self._startTime is None: self._startTime = time.time() # We need to compute the timestamp of the frame. # We want to handle audio without skips, where we simply increase the clock according # to the samples. However, sometimes the data might come in a bit late, which would # mean that we still want to get it correctly. # However, we have no guarantee that the data is actually coming without skipped # points. We try to detect if the data coming seems to be way behind the current # perfect timestamp, and in that situation, we can decide to skip forward if canSkip is True # https://en.wikipedia.org/wiki/Presentation_timestamp # Since our clock rate is simply our sample rate, the timestamp is the number of samples # we should have seen so far new_frame.pts = self._sampleNumber self._sampleNumber += data.shape[0] perfectSampleNumber = (int( (time.time() - self._startTime) * self._sampleRate) + data.shape[0]) # print(perfectSampleNumber - self._sampleNumber) if self._canSkip: if perfectSampleNumber - self._sampleRate * 1 > self._sampleNumber: # The audio is over 1 second behind where it is supposed to be. # Adjust the sample number to the ``corrected" version self._log.warn( "Received audio is over 1 second behind optimal timestamp! Skipping audio forward! Use canSkip=False to disable this correction" ) new_frame.pts = perfectSampleNumber - data.shape[0] if perfectSampleNumber + self._sampleRate * 2 < self._sampleNumber: # If the audio stream is over 2 seconds ahead, we wait 1 second before continuing self._log.debug( "Stream is over 2 seconds ahead. Sleeping for 1 second.") await asyncio.sleep(1) # print("\n\nSENDING DATA", new_frame, new_frame.time_base) self._log.debug("Writing frame %s", new_frame) return new_frame
async def recv(self): if self._startTime is None and self._startedCallback is not None: self._startedCallback() try: data = await self._audioSubscription.get() except SubscriptionClosed: self._log.debug( "Audio track finished. raising MediaStreamError to shut down connection" ) self.stop() raise MediaStreamError except: self._log.exception("Got unknown error. Crashing video stream") self.stop() raise MediaStreamError # self._log.exception("FAIL") # self._log.info("GOT AUDIO %d,%d", data.shape[0], data.shape[1]) # self._log.info("Creating FRAME") # https://trac.ffmpeg.org/wiki/audio%20types # https://github.com/mikeboers/PyAV/blob/develop/av/audio/frame.pyx # https://ffmpeg.org/doxygen/3.0/group__channel__mask__c.html # # It looks like at the time of writing, audio samples are only accepted as s16, # and not as float (flt) in aiortc. We therefore use s16 as format instead of flt, # and convert the data: # https://github.com/jlaine/aiortc/blob/master/aiortc/codecs/opus.py # We therefore force a conversion to 16 bit integer: data = (np.clip(data, -1, 1) * 32768).astype(np.int16) new_frame = AudioFrame.from_ndarray( data, format="s16", layout=str(data.shape[0]) + "c" ) # Use the sample rate for the base clock new_frame.sample_rate = self._sampleRate new_frame.time_base = fractions.Fraction(1, self._sampleRate) if self._startTime is None: self._startTime = time.time() # We need to compute the timestamp of the frame. # We want to handle audio without skips, where we simply increase the clock according # to the samples. However, sometimes the data might come in a bit late, which would # mean that we still want to get it correctly. # However, we have no guarantee that the data is actually coming without skipped # points. We try to detect if the data coming seems to be way behind the current # perfect timestamp, and in that situation, we can decide to skip forward if canSkip is True # https://en.wikipedia.org/wiki/Presentation_timestamp # Since our clock rate is simply our sample rate, the timestamp is the number of samples # we should have seen so far new_frame.pts = self._sampleNumber self._sampleNumber += data.shape[1] perfectSampleNumber = ( int((time.time() - self._startTime) * self._sampleRate) + data.shape[1] ) # print(perfectSampleNumber - self._sampleNumber) if self._canSkip: if perfectSampleNumber - self._sampleRate * 1 > self._sampleNumber: # The audio is over 1 second behind where it is supposed to be. # Adjust the sample number to the ``corrected" version self._log.warn( "Received audio is over 1 second behind optimal timestamp! Skipping audio forward! Use canSkip=False to disable this correction" ) new_frame.pts = perfectSampleNumber - data.shape[1] if perfectSampleNumber + self._sampleRate * 2 < self._sampleNumber: # If the audio stream is over 2 seconds ahead, we wait 1 second before continuing self._log.debug("Stream is over 2 seconds ahead. Sleeping for 1 second.") await asyncio.sleep(1) # print("\n\nSENDING DATA", new_frame, new_frame.time_base) self._log.debug("Writing frame %s", new_frame) return new_frame