def __init__(self) -> None: self._encoder = OpusEncoder() self._decoder = OpusDecoder() self._encoder.set_channels(consts.CHANNELS) self._encoder.set_sampling_frequency(consts.SAMPLE_RATE) self._encoder.set_application('voip') self._decoder.set_channels(consts.CHANNELS) self._decoder.set_sampling_frequency(consts.SAMPLE_RATE)
def __init__(self, host, port, out_filename, echo=False): super().__init__(host, port) # Have we started receiving data from the jitter buffer self._started = False # Open file for writing samples_per_second = 48000 # Output print(f"Opening file '{out_filename}' for writing as wave file") self._wave_write = wave.open(out_filename, "wb") self._wave_write.setnchannels(1) # FIXME self._wave_write.setsampwidth(2) # FIXME self._wave_write.setframerate(48000) # FIXME self._opus_decoder = OpusDecoder() self._opus_decoder.set_sampling_frequency(samples_per_second) self._opus_decoder.set_channels(1) #FIXME
class Encoder: def __init__(self) -> None: self._encoder = OpusEncoder() self._decoder = OpusDecoder() self._encoder.set_channels(consts.CHANNELS) self._encoder.set_sampling_frequency(consts.SAMPLE_RATE) self._encoder.set_application('voip') self._decoder.set_channels(consts.CHANNELS) self._decoder.set_sampling_frequency(consts.SAMPLE_RATE) def decode(self, frame: bytes) -> bytes: try: return self._decoder.decode(bytearray(frame)).tobytes() except Exception as e: print("Decode error: ", str(e)) def encode(self, frame: bytes) -> bytes: try: return self._encoder.encode(frame).tobytes() except Exception as e: print("Encode error: ", str(e))
print("Sampling frequency:", samples_per_second) bytes_per_sample = wave_read.getsampwidth() # Create an Opus encoder opus_encoder = OpusBufferedEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(samples_per_second) opus_encoder.set_channels(channels) desired_frame_duration = 20 # ms opus_encoder.set_frame_size(desired_frame_duration) # Setup decoding # ============== # Create an Opus decoder opus_decoder = OpusDecoder() opus_decoder.set_channels(channels) opus_decoder.set_sampling_frequency(samples_per_second) # Open an output wav for the decoded PCM output_filename = "output-" + filename wave_write = wave.open(output_filename, "wb") print("Writing wav into file '{:s}'".format(output_filename)) # Save the wav's specification wave_write.setnchannels(channels) wave_write.setframerate(samples_per_second) wave_write.setsampwidth(bytes_per_sample) # Execute encode-decode # =====================
def process_audio_frame(self, count): start_time = time.time() if count != 1: print("WARNING in process_audio_frame(), count:", count) # Repeat count times for _ in range(count): pcms = {} # For each jitter buffer, get the next packet and send it on. for address, connection in self._connections.items(): jitter_buffer = connection["jitter_buffer"] encoded_packet = jitter_buffer.get_packet() # Get decoder try: opus_decoder = connection["opus_decoder"] started = connection["started"] except KeyError: opus_decoder = OpusDecoder() opus_decoder.set_sampling_frequency(48000) # FIXME opus_decoder.set_channels(1) # FIXME started = False connection["opus_decoder"] = opus_decoder connection["started"] = started # Decode encoded packet to PCM if encoded_packet is None: duration_ms = 20 # ms FIXME if not started: # We haven't started yet, so ignore this connection pcm = None else: # We have started, so this means we've lost a packet pcm = opus_decoder.decode_missing_packet(duration_ms) else: # We've got a valid packet, decode it pcm = opus_decoder.decode(encoded_packet) # Convert the PCM to floating point if pcm is not None: pcm_int16 = numpy.frombuffer(pcm, dtype=numpy.int16) pcm_float = pcm_int16.astype(numpy.float32) pcm_float /= 2**15 pcm_float = numpy.reshape(pcm_float, (len(pcm_float), 1)) pcm = pcm_float pcms[address] = pcm # The number of people who may simultaneously speak # without the volume decreasing simultaneous_voices = 2 # FIXME # Loop through all the pcms and mix them together combined_pcm = None for address, pcm in pcms.items(): if pcm is None: continue pcm /= simultaneous_voices pcms[address] = pcm if combined_pcm is None: combined_pcm = pcm.copy() else: combined_pcm += pcm # Prepare each individual client's PCM if combined_pcm is not None: # Send the encoded packet to all the clients for address, connection in self._connections.items(): client_signal = pcms[address] if client_signal is None: continue # Remove their signal from the audio client_pcm = combined_pcm - client_signal # Convert from float32 to int16 pcm_int16 = client_pcm * (2**15 - 1) pcm_int16 = pcm_int16.astype(numpy.int16) # Obtain encoder try: opus_encoder = connection["opus_encoder"] except KeyError: opus_encoder = OpusEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(48000) opus_encoder.set_channels(1) connection["opus_encoder"] = opus_encoder # Encode the PCM encoded_packet = opus_encoder.encode(pcm_int16.tobytes()) # Send encoded packet udp_packetizer = connection["udp_packetizer"] if encoded_packet is not None: udp_packetizer.write(encoded_packet) end_time = time.time() duration_ms = (end_time - start_time) * 1000 if duration_ms > 5: print(f"process_audio_frame() duration: {round(duration_ms)} ms")
def process_audio_frame(self, count): start_time = time.time() if count != 1: print(f"WARNING in process_audio_frame(), catching up on {count} missed cycles") # Repeat count times for _ in range(count): pcms = {} # For each jitter buffer, get the next packet and send it on. for address, connection in self._connections_by_address.items(): jitter_buffer = connection["jitter_buffer"] encoded_packet = jitter_buffer.get_packet() # Get decoder try: opus_decoder = connection["opus_decoder"] started = connection["started"] except KeyError: opus_decoder = OpusDecoder() opus_decoder.set_sampling_frequency(48000) # FIXME opus_decoder.set_channels(1) # FIXME started = False connection["opus_decoder"] = opus_decoder connection["started"] = started # Decode encoded packet to PCM if encoded_packet is None: duration_ms = 20 # ms FIXME if not started: # We haven't started yet, so ignore this connection pcm = None else: # We have started, so this means we've lost a packet pcm = opus_decoder.decode_missing_packet(duration_ms) else: # We've got a valid packet, decode it pcm = opus_decoder.decode(encoded_packet) # Convert the PCM to floating point if pcm is not None: pcm_int16 = numpy.frombuffer( pcm, dtype = numpy.int16 ) pcm_float = pcm_int16.astype(numpy.float32) pcm_float /= 2**15 pcm_float = numpy.reshape(pcm_float, (len(pcm_float), 1)) pcm = pcm_float # # Apply automatic gain control to the PCM # try: # agc = connection["automatic_gain_control"] # except KeyError: # agc = AutomaticGainControl() # connection["automatic_gain_control"] = agc # agc.apply(pcm) # print("gain:", agc.gain) # Store the PCM pcms[address] = pcm # The number of people who may simultaneously speak # without the volume decreasing simultaneous_voices = 2 # FIXME # Loop through all the pcms and mix them together combined_pcm = None for address, pcm in pcms.items(): if pcm is None: continue pcm /= simultaneous_voices pcms[address] = pcm if combined_pcm is None: combined_pcm = pcm.copy() else: combined_pcm += pcm # Mix in playback audio if (self._stream is not None or self._stream_buffer is not None): # Read the next part of the stream until either we # come to the end or we've got sufficient for a full # frame. samples_per_second = 48000 # FIXME duration_ms = 20 # FIXME samples_per_frame = samples_per_second // 1000 * duration_ms # FIXME while (self._stream_buffer is None or len(self._stream_buffer) < samples_per_frame): next_ = self._stream.get_buffer_as_array() if next_ is None: # We've come to the end self._stream = None break # Join what we just read to what we've read so far if self._stream_buffer is None: self._stream_buffer = numpy.copy(next_) else: self._stream_buffer = numpy.concatenate( (self._stream_buffer, next_) ) # Obtain the PCM if len(self._stream_buffer) >= samples_per_frame: # Take what we need to fill a frame and leave the rest pcm = self._stream_buffer[:samples_per_frame] self._stream_buffer = self._stream_buffer[samples_per_frame:] else: # Take whatever's left pcm = self._stream_buffer self._stream_buffer = None # Convert the int16 data to float pcm_float = pcm.astype(numpy.float32) pcm_float /= 2**15 # Convert it to mono if it's in stereo pcm_float = numpy.mean(pcm_float, axis=1) pcm_float = pcm_float.reshape((len(pcm_float),1)) # Fill with zeros if it's not long enough if len(pcm_float) < samples_per_frame: pcm_float = numpy.concatenate( (pcm_float, numpy.zeros( (samples_per_frame - len(pcm_float), 1) )) ) # Halve the volume pcm_float /= 2 # Add it into the combined pcm if combined_pcm is None: combined_pcm = pcm_float else: combined_pcm += pcm_float # Prepare each individual client's PCM if combined_pcm is not None: # Send the encoded packet to all the clients for address, connection in self._connections.items(): client_signal = pcms[address] if client_signal is None: continue # Remove their signal from the audio client_pcm = combined_pcm #- client_signal # Convert from float32 to int16 pcm_int16 = client_pcm * (2**15-1) pcm_int16 = pcm_int16.astype(numpy.int16) # Obtain encoder try: opus_encoder = connection["opus_encoder"] except KeyError: opus_encoder = OpusEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(48000) opus_encoder.set_channels(1) connection["opus_encoder"] = opus_encoder # Encode the PCM encoded_packet = opus_encoder.encode(pcm_int16.tobytes()) # Send encoded packet udp_packetizer = connection["udp_packetizer"] if encoded_packet is not None: udp_packetizer.write(encoded_packet) end_time = time.time() duration_ms = (end_time-start_time)*1000 if duration_ms > 5: print(f"process_audio_frame() duration: {round(duration_ms)} ms")
def _make_callback(self): # Jitter buffer is thread safe jitter_buffer = self._jitter_buffer samples_per_second = 48000 # FIXME frame_size_ms = 20 # ms FIXME # Sound to indicate to user session has started filename = pkg_resources.resource_filename("singtclient", "sounds/discussion.opus") sound = OpusFile(filename).as_array() sound = numpy.mean(sound, axis=1) # Make mono sound = numpy.reshape(sound, (len(sound), 1)) sound /= 2**16 sound_played = False sound_pos = 0 # Automatic gain control dedicated to callback #agc = AutomaticGainControl() # Fixed gain try: gain = self._context["discussion_gain"] except KeyError: gain = 1 # OpusDecoder dedicated to callback opus_decoder = OpusDecoder() opus_decoder.set_sampling_frequency(samples_per_second) opus_decoder.set_channels(1) #FIXME # OpusBufferedEncoder dedicated to callback opus_encoder = OpusBufferedEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(samples_per_second) opus_encoder.set_channels(1) #FIXME opus_encoder.set_frame_size(frame_size_ms) # ms #opus_encoder.set_discontinuous_transmission() # PCM buffer dedicated to callback buf = None # Started flag dedicated to callback started = False def callback(indata, outdata, frames, time, status): nonlocal jitter_buffer nonlocal buf nonlocal started nonlocal sound_pos, sound_played #print("in callback") if status: print(status) # Input # ===== # Apply automatic gain control, this will improve the # quality of the audio that's sent. #agc.apply(indata) #print("gain:", agc.gain) # Apply fixed gain numpy.multiply(indata, gain, out=indata) # Convert from float32 to int16 indata_int16 = indata * (2**15 - 1) indata_int16 = indata_int16.astype(numpy.int16) encoded_packets = opus_encoder.encode(indata_int16.tobytes()) # Send the encoded packets. Make sure not to call from # this thread. for encoded_packet in encoded_packets: reactor.callFromThread(self._udp_packetizer.write, encoded_packet) # Check the discontinuous transmission (DTX) state #print("DTX state:", opus_encoder.in_discontinuous_transmission()) # Output # ====== def decode_next_packet(): nonlocal started #print("in decode_next_packet") encoded_packet = jitter_buffer.get_packet() if encoded_packet is not None: # Decode the encoded packet pcm = opus_decoder.decode(encoded_packet) started = True else: # Accept that we're missing the packet if started: print("WARNING Missing packet") pcm = opus_decoder.decode_missing_packet(frame_size_ms) else: # We haven't even started, just output silence #print("Haven't even started, return silence") channels = outdata.shape[1] samples = frame_size_ms * samples_per_second // 1000 pcm = numpy.zeros((samples, channels), dtype=numpy.int16) # Convert the data to floating point pcm_int16 = numpy.frombuffer(pcm, dtype=numpy.int16) pcm_float = pcm_int16.astype(numpy.float32) pcm_float /= 2**15 pcm_float = numpy.reshape(pcm_float, (len(pcm_float), 1)) # DEBUG if pcm_float.shape[ 0] != frame_size_ms * samples_per_second // 1000: print( "FAIL Frame size isn't the desired duration ***********************************************" ) print(f"It's first dimension is {pcm_float.shape[0]}.") if pcm_float.shape[1] != 1: # channels print( "FAIL Frame size isn't the correct number of channels") return pcm_float # If there's insufficient data in buf attempt to obtain it # from the jitter buffer while buf is None or len(buf) < frames: if buf is None: buf = decode_next_packet() else: buf = numpy.concatenate((buf, decode_next_packet())) # Copy the data from the buffer remove from the buffer than which # we used. outdata[:] = buf[:frames] # This is INEFFICIENT and could be improved buf = buf[frames:] # If we haven't finished playing the sound, mix it in. # TODO: This could be improved by fading in the audio signal if not sound_played: if sound_pos + frames < len(sound): outdata[:] += sound[sound_pos:sound_pos + frames] else: outdata[:len(sound) - sound_pos] += sound[sound_pos:len(sound)] sound_played = True sound_pos += frames return callback
class UDPClientRecorder(UDPClientBase): def __init__(self, host, port, out_filename, echo=False): super().__init__(host, port) # Have we started receiving data from the jitter buffer self._started = False # Open file for writing samples_per_second = 48000 # Output print(f"Opening file '{out_filename}' for writing as wave file") self._wave_write = wave.open(out_filename, "wb") self._wave_write.setnchannels(1) # FIXME self._wave_write.setsampwidth(2) # FIXME self._wave_write.setframerate(48000) # FIXME self._opus_decoder = OpusDecoder() self._opus_decoder.set_sampling_frequency(samples_per_second) self._opus_decoder.set_channels(1) #FIXME def startProtocol(self): super().startProtocol() assert self.transport is not None interval = 20 / 1000 self.start_audio_processing_loop(interval) def process_audio_frame(self, count): start_time = time.time() # # DEBUG # try: # counter = self.counter # except: # self.counter = 0 # counter = 0 # if counter % 20 == 0: # time.sleep(250/1000) # self.counter += 1 if count > 1: log.info(f"In process_audio_frame() count: {count}") samples_per_second = 48000 # FIXME frame_size_ms = 20 # FIXME #DEBUG assert self.transport is not None # Repeat count times for _ in range(count): # # Send packets # # ============ # try: # next(self._send_packet_generator) # except StopIteration: # d = self.transport.stopListening() # if d is None: # log.error("STOPPING CLIENT") # reactor.stop() # else: # def on_success(data): # print("WARNING: In on_success. data:",str(data)) # log.error("STOPPING CLIENT") # reactor.stop() # def on_error(data): # print("ERROR Failed to stop listening:"+str(data)) # d.addCallback(on_success) # d.addErrback(on_error) # return d # Received Packets # ================ # Get next packet from jitter buffer encoded_packet = self._jitter_buffer.get_packet() # Decode packet if encoded_packet is not None: # Decode the encoded packet pcm = self._opus_decoder.decode(encoded_packet) self._started = True else: # Accept that we're missing the packet if self._started: print("WARNING Missing packet") pcm = self._opus_decoder.decode_missing_packet( frame_size_ms) else: # We haven't even started, just output silence #print("Haven't even started, return silence") channels = 1 # FIXME samples = frame_size_ms * samples_per_second // 1000 pcm = numpy.zeros((samples, channels), dtype=numpy.int16) # Save PCM into wave file self._wave_write.writeframes(pcm) end_time = time.time() #print(f"process_audio_frame() duration: {round((end_time-start_time)*1000)} ms") def start_audio_processing_loop(self, interval=20 / 1000): looping_call = LoopingCall.withCount(self.process_audio_frame) d = looping_call.start(interval) def on_stop(data): print("The audio processing loop was stopped") def on_error(data): print("ERROR: An error occurred during the audio processing loop:", data) raise Exception( "An error occurred during the audio processing loop:" + str(data)) d.addCallback(on_stop) d.addErrback(on_error) return d
class UDPClientTester(UDPClientBase): def __init__(self, host, port, in_filename, out_filename): super().__init__(host, port) # Have we started receiving data from the jitter buffer self._started = False # Open file for writing samples_per_second = 48000 # Output print(f"Opening file '{out_filename}' for writing as wave file") self._wave_write = wave.open(out_filename, "wb") self._wave_write.setnchannels(1) # FIXME self._wave_write.setsampwidth(2) # FIXME self._wave_write.setframerate(48000) # FIXME self._opus_decoder = OpusDecoder() self._opus_decoder.set_sampling_frequency(samples_per_second) self._opus_decoder.set_channels(1) #FIXME # Input self._send_packet_generator = self._get_send_packet_generator( in_filename) def startProtocol(self): super().startProtocol() assert self.transport is not None interval = 20 / 1000 self.start_audio_processing_loop(interval) def process_audio_frame(self, count): start_time = time.time() if count > 1: log.info(f"In process_audio_frame() count: {count}") samples_per_second = 48000 # FIXME frame_size_ms = 20 # FIXME #DEBUG assert self.transport is not None # Repeat count times for _ in range(count): # Send packets # ============ try: next(self._send_packet_generator) except StopIteration: d = self.transport.stopListening() if d is None: log.error("STOPPING CLIENT") reactor.stop() else: def on_success(data): print("WARNING: In on_success. data:", str(data)) log.error("STOPPING CLIENT") reactor.stop() def on_error(data): print("ERROR Failed to stop listening:" + str(data)) d.addCallback(on_success) d.addErrback(on_error) return d # Received Packets # ================ # Get next packet from jitter buffer encoded_packet = self._jitter_buffer.get_packet() # Decode packet if encoded_packet is not None: # Decode the encoded packet pcm = self._opus_decoder.decode(encoded_packet) self._started = True else: # Accept that we're missing the packet if self._started: print("WARNING Missing packet") pcm = self._opus_decoder.decode_missing_packet( frame_size_ms) else: # We haven't even started, just output silence #print("Haven't even started, return silence") channels = 1 # FIXME samples = frame_size_ms * samples_per_second // 1000 pcm = numpy.zeros((samples, channels), dtype=numpy.int16) # Save PCM into wave file self._wave_write.writeframes(pcm) end_time = time.time() duration = (end_time - start_time) * 1000 if duration > 5: print(f"process_audio_frame() duration: {duration:0.0f} ms") def start_audio_processing_loop(self, interval=20 / 1000): looping_call = LoopingCall.withCount(self.process_audio_frame) d = looping_call.start(interval) def on_stop(data): print("The audio processing loop was stopped") def on_error(data): print("ERROR: An error occurred during the audio processing loop:", data) raise Exception( "An error occurred during the audio processing loop:" + str(data)) d.addCallback(on_stop) d.addErrback(on_error) return d def _get_send_packet_generator(self, filename): # Open wav file wave_read = wave.open(filename, "rb") # Extract the wav's specification channels = wave_read.getnchannels() print("Number of channels:", channels) samples_per_second = wave_read.getframerate() print("Sampling frequency:", samples_per_second) bytes_per_sample = wave_read.getsampwidth() # Create an Opus encoder opus_encoder = OpusBufferedEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(samples_per_second) opus_encoder.set_channels(channels) opus_encoder.set_frame_size(20) # Calculate the desired frame size (in samples per channel) desired_frame_duration = 20 / 1000 # milliseconds desired_frame_size = int(desired_frame_duration * samples_per_second) # Loop through the wav file converting its PCM to Opus-encoded packets def send_packets(): while True: # Get data from the wav file pcm = wave_read.readframes(desired_frame_size) # Check if we've finished reading the wav file if len(pcm) == 0: print("client_udp: finished reading wave file") break # Calculate the effective frame size from the number of bytes # read effective_frame_size = ( len(pcm) # bytes // bytes_per_sample // channels) # Check if we've received enough data if effective_frame_size < desired_frame_size: # We haven't read a full frame from the wav file, so this # is most likely a final partial frame before the end of # the file. We'll pad the end of this frame with silence. pcm += (b"\x00" * ((desired_frame_size - effective_frame_size) * bytes_per_sample * channels)) # Encode the PCM data encoded_packets = opus_encoder.encode(pcm) for encoded_packet in encoded_packets: self._udp_packetizer.write(encoded_packet) yield "Not done yet" # # TEST: What happens if not all packets are delivered? # if random.random() <= 0.05: # # Discard # print("Discarding") # pass # elif random.random() <= 0.7: # # Reorder # print("Reordering") # store.append(packet) # else: # print("Sending") # # Send # self.transport.write(packet) # # Send all stored packets # for p in random.sample(store, k=len(store)): # self.transport.write(p) # store = [] print(f"Finished sending '{filename}'") return send_packets()
def _make_callback(self): # Jitter buffer is thread safe jitter_buffer = self._jitter_buffer samples_per_second = 48000 # FIXME frame_size_ms = 20 # ms FIXME # OpusDecoder dedicated to callback opus_decoder = OpusDecoder() opus_decoder.set_sampling_frequency(samples_per_second) opus_decoder.set_channels(1) #FIXME # OpusBufferedEncoder dedicated to callback opus_encoder = OpusBufferedEncoder() opus_encoder.set_application("audio") opus_encoder.set_sampling_frequency(samples_per_second) opus_encoder.set_channels(1) #FIXME opus_encoder.set_frame_size(frame_size_ms) # ms # PCM buffer dedicated to callback buf = None # Started flag dedicated to callback started = False def callback(indata, outdata, frames, time, status): nonlocal jitter_buffer nonlocal buf nonlocal started #print("in callback") if status: print(status) # Input # ===== if indata is None: print( "************************ indata is None ***************") # Convert from float32 to int16 indata_int16 = indata * (2**15 - 1) indata_int16 = indata_int16.astype(numpy.int16) encoded_packets = opus_encoder.encode(indata_int16.tobytes()) # Send the encoded packets. Make sure not to call from # this thread. for encoded_packet in encoded_packets: reactor.callFromThread(self._udp_packetizer.write, encoded_packet) # Output # ====== def decode_next_packet(): nonlocal started #print("in decode_next_packet") encoded_packet = jitter_buffer.get_packet() if encoded_packet is not None: # Decode the encoded packet pcm = opus_decoder.decode(encoded_packet) started = True else: # Accept that we're missing the packet if started: print("WARNING Missing packet") pcm = opus_decoder.decode_missing_packet(frame_size_ms) else: # We haven't even started, just output silence #print("Haven't even started, return silence") channels = outdata.shape[1] samples = frame_size_ms * samples_per_second // 1000 pcm = numpy.zeros((samples, channels), dtype=numpy.int16) # Convert the data to floating point pcm_int16 = numpy.frombuffer(pcm, dtype=numpy.int16) pcm_float = pcm_int16.astype(numpy.float32) pcm_float /= 2**15 pcm_float = numpy.reshape(pcm_float, (len(pcm_float), 1)) # DEBUG if pcm_float.shape[ 0] != frame_size_ms * samples_per_second // 1000: print( "FAIL Frame size isn't the desired duration ***********************************************" ) print(f"It's first dimension is {pcm_float.shape[0]}.") if pcm_float.shape[1] != 1: # channels print( "FAIL Frame size isn't the correct number of channels") return pcm_float # If there's insufficient data in buf attempt to obtain it # from the jitter buffer while buf is None or len(buf) < frames: if buf is None: buf = decode_next_packet() else: buf = numpy.concatenate((buf, decode_next_packet())) # Copy the data from the buffer remove from the buffer than which # we used. outdata[:] = buf[:frames] # This is INEFFICIENT and could be improved buf = buf[frames:] return callback