def read_opus(opus_file): pcm_buffer_size, audio_format = read_opus_header(opus_file) rate, channels, _ = audio_format frame_size = get_opus_frame_size(rate) import opuslib decoder = opuslib.Decoder(rate, channels) audio_data = bytearray() while len(audio_data) < pcm_buffer_size: chunk_len = unpack_number(opus_file.read(OPUS_CHUNK_LEN_SIZE)) chunk = opus_file.read(chunk_len) decoded = decoder.decode(chunk, frame_size) audio_data.extend(decoded) audio_data = audio_data[:pcm_buffer_size] return audio_format, audio_data
def __init__( self, source: str, volume=0.5, guild_id: int = 0, *args, **kwargs ): self.volume = volume super().__init__(source, *args, **kwargs) self.guild_id = guild_id self.bytes_read = 0 self.decoder = opuslib.Decoder( OpusEncoder.SAMPLING_RATE, OpusEncoder.CHANNELS ) self.encoder = opuslib.Encoder( OpusEncoder.SAMPLING_RATE, OpusEncoder.CHANNELS, "voip" )
def __init__(self, mumble_object): self.mumble_object = mumble_object self.queue = deque() self.start_sequence = None self.start_time = None self.receive_sound = True self.lock = Lock() # to be sure, create every supported decoders for all users # sometime, clients still use a codec for a while after server request another... self.decoders = { PYMUMBLE_AUDIO_TYPE_OPUS: opuslib.Decoder(PYMUMBLE_SAMPLERATE, 1) }
def __init__(self): self.accept_rate = 1.0 self.ready_rate = 1.0 self.ready_next_rate = 0.0 self.decoded = None self.decoder = opuslib.Decoder(24000, 1) self.decoder_lock = threading.Lock() self.dupe_check = util.DupeCheck() self.heap = [] self.heap_lock = threading.Lock() self.last_packet_time = None self.wake_event = threading.Event() self.wake_lock = threading.Lock() self.decoder_thread = util.start_daemon(self.run_decoder) self.last_missing = False self.last_played = None
def test_gain(self): decoder = opuslib.Decoder(48000, 2) self.assertEqual(decoder.gain, 0) try: decoder.gain = -32769 except opuslib.OpusError as exc: self.assertEqual(exc.code, opuslib.BAD_ARG) try: decoder.gain = 32768 except opuslib.OpusError as exc: self.assertEqual(exc.code, opuslib.BAD_ARG) decoder.gain = -15 self.assertEqual(decoder.gain, -15)
def __init__(self, remote_host_address, remote_host_port, host_address, rtp_port): self.CHUNK_SIZE = 960 #640 #320 self.mic = MicDevice(sr=self.SAMPLE_RATE, chunk_size=self.CHUNK_SIZE) self.sender = RTPSendClient(remote_host_address=remote_host_address, remote_host_port=remote_host_port) self.receiver = RTPReceiveClient(host_address=host_address, rtp_port=rtp_port) self.opus_encode = opuslib.Encoder(self.SAMPLE_RATE, 1, opuslib.APPLICATION_VOIP) self.opus_decode = opuslib.Decoder(self.SAMPLE_RATE, 1) self.aout = AudioOutput() self.frame_count = 0 self.start_ts = -1.0 print('init client')
def __enter__(self): if self._encoding_hint == tts_pb2.RAW_OPUS: import opuslib self._encoding_hint = tts_pb2.LINEAR16 self._opus_decoder = opuslib.Decoder(self._sample_rate_hint, 1) else: self._opus_decoder = None import pyaudio pyaudio_lib = pyaudio.PyAudio() self.callback(pyaudio_lib.terminate) format, self._width = _encoding_to_pyaudio_format_and_width( self._encoding_hint) self._stream = pyaudio_lib.open(output=True, channels=1, format=format, rate=self._sample_rate_hint) self.callback(self._stream.close) self.callback(self._stream.stop_stream) return super().__enter__()
def worker(): # worker thread nonlocal loop, im, event, f pcm = im.tobytes() if len(pcm) % 2: pcm = b'\0' + pcm encoder = opuslib.Encoder(48000, 1, 'restricted_lowdelay') frames = [] for x in range(0, len(pcm), 960 * 2): frames.append(encoder.encode(pcm[x:x + 960 * 2], 960)) opcmlen = len(pcm) decoder = opuslib.Decoder(48000, 1) data = b'' for frame in frames: data += decoder.decode(frame, 960) im = Image.frombytes(im.mode, im.size, data[len(data) - opcmlen:]) im = im.convert("RGB") f = io.BytesIO() im.save(f, "JPEG") f.seek(0) loop.call_soon_threadsafe(event.set)
def test_reset_state(cls): decoder = opuslib.Decoder(48000, 2) decoder.reset_state()
def test_get_bandwidth(self): decoder = opuslib.Decoder(48000, 2) self.assertEqual(decoder.bandwidth, 0)
def __init__(self, samples=48000, frames=960, channels=1): self.frames = frames self.encoder = opuslib.Encoder(samples, channels, opuslib.APPLICATION_VOIP) self.decoder = opuslib.Decoder(samples, channels)
def handle_post(in_data_raw, query_params, headers): global last_request_clock global first_client_write_clock global first_client_total_samples global first_client_value global global_volume global song_end_clock global song_start_clock # NOTE NOTE NOTE: # * All `clock` variables are measured in samples. # * All `clock` variables represent the END of an interval, NOT the # beginning. It's arbitrary which one to use, but you have to be # consistent, and trust me that it's slightly nicer this way. # Note: This will eventually create a precision problem for the JS # clients, which are using floats. Specifically, at 44100 Hz, it will # fail on February 17, 5206. server_clock = int(time.time() * SAMPLE_RATE) client_write_clock = query_params.get("write_clock", None) if client_write_clock is not None: client_write_clock = int(client_write_clock[0]) client_read_clock = query_params.get("read_clock", None) if client_read_clock is not None: client_read_clock = int(client_read_clock[0]) else: raise ValueError("no client read clock") n_samples = query_params.get("n_samples", None) if n_samples is not None: n_samples = int(n_samples[0]) userid = None userids = query_params.get("userid", None) username = None usernames = query_params.get("username", None) if not userids or not usernames: raise ValueError("missing username/id") userid, = userids username, = usernames if not userid or not username: raise ValueError("missing username/id") if client_write_clock is None: # New session, write some debug info to disk logging.debug("*** New client:" + str(headers) + str(query_params) + "\n\n") # This indicates a new session, so flush everything. (There's probably a better way to handle this.) prev_last_write_clock = None if (client_write_clock is None) and (userid in users): prev_last_write_clock = users[userid].last_write_clock del users[userid] update_users(userid, username, server_clock, client_read_clock) user = users[userid] if user.last_write_clock is None: user.last_write_clock = prev_last_write_clock volumes = query_params.get("volume", None) if volumes: volume, = volumes global_volume = math.exp(6.908 * float(volume)) / 1000 msg_chats = query_params.get("chat", None) if msg_chats: msg_chats, = msg_chats msg_chats = json.loads(msg_chats) for other_userid, other_user in users.items(): if other_userid != userid: for msg_chat in msg_chats: other_user.chats_to_send.append((username, msg_chat)) mic_volumes = query_params.get("mic_volume", None) if mic_volumes: mic_volume, = mic_volumes for other_userid, new_mic_volume in json.loads(mic_volume): if other_userid in users: if new_mic_volume > 2: new_mic_volume = 2 elif new_mic_volume < 0: new_mic_volume = 0 users[other_userid].mic_volume = new_mic_volume # https://www.dr-lex.be/info-stuff/volumecontrols.html # Make 1 be unity users[other_userid].scaled_mic_volume = math.exp( 6.908 * new_mic_volume * .5) / math.exp(6.908 * 0.5) if query_params.get("request_lead", None): assign_delays(userid) song_start_clock = None song_end_clock = 0 if query_params.get("mark_start_singing", None): song_start_clock = user.last_write_clock song_end_clock = 0 if query_params.get("mark_stop_singing", None): song_end_clock = user.last_write_clock # They're done singing, send them to the end. user.delay_to_send = max_position in_data = np.frombuffer(in_data_raw, dtype=np.uint8) # Audio from clients is summed, so we need to clear the circular # buffer ahead of them. The range we are clearing was "in the # future" as of the last request, and we never touch the future, # so nothing has touched it yet "this time around". if last_request_clock is not None: clear_samples = min(server_clock - last_request_clock, QUEUE_LENGTH) wrap_assign(audio_queue, last_request_clock, np.zeros(clear_samples, np.float32)) wrap_assign(n_people_queue, last_request_clock, np.zeros(clear_samples, np.int16)) saved_last_request_clock = last_request_clock last_request_clock = server_clock if not user.opus_state: # initialize user.opus_state = (opuslib.Encoder(SAMPLE_RATE, CHANNELS, opuslib.APPLICATION_AUDIO), opuslib.Decoder(SAMPLE_RATE, CHANNELS)) (enc, dec) = user.opus_state # If the user does not send us any data, we will treat it as silence of length n_samples. This is useful if they are just starting up. if len(in_data) == 0: if n_samples is None: raise ValueError("Must provide either n_samples or data") in_data = np.zeros(n_samples, np.float32) else: packets = unpack_multi(in_data) decoded = [] for p in packets: d = dec.decode_float(p.tobytes(), OPUS_FRAME_SAMPLES, decode_fec=False) decoded.append(np.frombuffer(d, np.float32)) in_data = np.concatenate(decoded) # Sending n_samples is optional if data is sent, but in case of both they must match if n_samples is None: n_samples = len(in_data) if n_samples != len(in_data): raise ValueError("Client is confused about how many samples it sent") if client_write_clock is None: pass elif client_write_clock - n_samples < server_clock - QUEUE_LENGTH: # Client is too far behind and going to wrap the buffer. :-( raise ValueError("Client's write clock is too far in the past") else: if user.last_seen_write_clock is not None: # For debugging purposes only if client_write_clock - n_samples != user.last_seen_write_clock: raise ValueError( f'Client write clock desync (' f'{client_write_clock - n_samples} - ' f'{user.last_seen_write_clock} = ' f'{client_write_clock - n_samples - user.last_seen_write_clock})' ) if user.last_write_clock <= song_end_clock <= client_write_clock: user.delay_to_send = max_position user.last_seen_write_clock = client_write_clock if client_write_clock is not None: user.last_write_clock = client_write_clock in_data *= user.scaled_mic_volume # Don't keep any input unless a song is in progress. if (song_start_clock and client_write_clock > song_start_clock and (not song_end_clock or client_write_clock - n_samples < song_end_clock)): old_audio = wrap_get(audio_queue, client_write_clock - n_samples, n_samples) new_audio = old_audio + in_data wrap_assign(audio_queue, client_write_clock - n_samples, new_audio) old_n_people = wrap_get(n_people_queue, client_write_clock - n_samples, n_samples) new_n_people = old_n_people + np.ones(n_samples, np.int16) wrap_assign(n_people_queue, client_write_clock - n_samples, new_n_people) # Why subtract n_samples above and below? Because the future is to the # right. So when a client asks for n samples at time t, what they # actually want is "the time interval ending at t", i.e. [t-n, t). Since # the latest possible time they can ask for is "now", this means that # the latest possible time interval they can get is "the recent past" # instead of "the near future". # This doesn't matter to the clients if they all always use the same value of # n_samples, but it matters if n_samples changes, and it matters for # the server's zeroing. # For debugging purposes only if user.last_seen_read_clock is not None: if client_read_clock - n_samples != user.last_seen_read_clock: raise ValueError( f'Client read clock desync (' f'{client_read_clock - n_samples} - ' f'{user.last_seen_read_clock} = ' f'{client_read_clock - n_samples - user.last_seen_read_clock})' ) user.last_seen_read_clock = client_read_clock if query_params.get("loopback", [None])[0] == "true": data = in_data else: data = wrap_get(audio_queue, client_read_clock - n_samples, n_samples) n_people = wrap_get(n_people_queue, client_read_clock - n_samples, n_samples) # We could scale volume by having n_people be the number of # earlier people and then scale by a simple 1/n_people. But a # curve of (1 + X) / (n_people + X) falls a bit less # dramatically and should sound better. # # Compare: # https://www.wolframalpha.com/input/?i=graph+%281%29+%2F+%28x%29+from+1+to+10 # https://www.wolframalpha.com/input/?i=graph+%281%2B3%29+%2F+%28x%2B3%29+from+1+to+10 data = data * (1 + N_PHANTOM_PEOPLE) / (n_people + N_PHANTOM_PEOPLE) data *= global_volume packets = data.reshape([-1, OPUS_FRAME_SAMPLES]) encoded = [] for p in packets: e = np.frombuffer(enc.encode_float(p.tobytes(), OPUS_FRAME_SAMPLES), np.uint8) encoded.append(e) data = pack_multi(encoded).tobytes() x_audio_metadata = json.dumps({ "server_clock": server_clock, "server_sample_rate": SAMPLE_RATE, "last_request_clock": saved_last_request_clock, "client_read_clock": client_read_clock, "client_write_clock": client_write_clock, "user_summary": user_summary(), "chats": user.chats_to_send, "delay_seconds": user.delay_to_send, "song_start_clock": song_start_clock, # Both the following uses units of 128-sample frames "queue_size": QUEUE_LENGTH / FRAME_SIZE, }) user.chats_to_send.clear() user.delay_to_send = None return data, x_audio_metadata
text= "И мысли тоже тяжелые и медлительные, падают неторопливо и редко одна за другой, точно песчинки " "в разленившихся песочных часах."), audio_config=tts_pb2.AudioConfig( audio_encoding=tts_pb2.RAW_OPUS, sample_rate_hertz=sample_rate, ), ) pyaudio_lib = pyaudio.PyAudio() f = pyaudio_lib.open(output=True, channels=1, format=pyaudio.paInt16, rate=sample_rate) opus_decoder = opuslib.Decoder(sample_rate, 1) stub = tts_pb2_grpc.TextToSpeechStub( grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) request = build_request() metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for key, value in responses.initial_metadata(): if key == "x-audio-duration-seconds": print("Estimated audio duration is {:.2f} seconds".format( float(value))) break for stream_response in responses: f.write( opus_decoder.decode(stream_response.audio_chunk, MAX_ALLOWED_FRAME_RATE))
wave_out = wave.open('wav_out1.wav', 'w') wave_out.setnchannels(2) wave_out.setsampwidth(2) wave_out.setframerate(48000) wave_out.writeframes(pcm) wave_out.close() #framesize is per channel samples, but max is 60ms frame, ie 2880 # not particularly good for storage, since we need to break it up into # many frames encoder = opuslib.Encoder(48000, 2, opuslib.APPLICATION_AUDIO) opus_encode = encoder.encode(pcm_data=pcm, frame_size=2880) print(len(opus_encode)) decoder = opuslib.Decoder(48000, 2) opus_decode = decoder.decode(opus_data=opus_encode, frame_size=2880) print(len(opus_decode)) #raw = struct.unpack('h' * (len(opus_decode)//2), opus_decode) #raw = raw[0::2] #print(len(raw)) out = wave.open('wav_out2.wav', 'w') out.setnchannels(2) out.setsampwidth(2) out.setframerate(48000) out.writeframes(opus_decode) out.close()
def handle_post(userid, n_samples, in_data_raw, query_string, print_status=True, client_address=None) -> Tuple[Any, str]: if not userid.isdigit(): raise ValueError("UserID must be numeric; got: %r"%userid) try: enc, dec = users[userid] except KeyError: enc = opuslib.Encoder(server.SAMPLE_RATE, CHANNELS, opuslib.APPLICATION_AUDIO) dec = opuslib.Decoder(server.SAMPLE_RATE, CHANNELS) users[userid] = enc, dec in_data = np.frombuffer(in_data_raw, dtype=np.uint8) # If the user does not send us any data, we will treat it as silence of length n_samples. This is useful if they are just starting up. client_no_data = len(in_data)==0 if client_no_data: if n_samples == 0: raise ValueError("Must provide either n_samples or data") in_data = np.zeros(n_samples, np.float32) else: packets = unpack_multi(in_data) decoded = [] for p in packets: d = dec.decode_float(p.tobytes(), OPUS_FRAME_SAMPLES, decode_fec=False) decoded.append(np.frombuffer(d, np.float32)) in_data = np.concatenate(decoded) # Sending n_samples is optional if data is sent, but in case of both they must match if n_samples == 0: n_samples = len(in_data) if n_samples != len(in_data): raise ValueError("Client is confused about how many samples it sent (got %s expected %s" % (n_samples, len(in_data))) rms_volume = calculate_volume(in_data) # This is only safe because query_string is guaranteed to already contain # at least the userid parameter. query_string += '&rms_volume=%s'%rms_volume data, x_audio_metadata = handle_json_post( in_data, query_string, print_status, client_address=client_address) # Divide data into user_summary and raw audio data n_users_in_summary, = struct.unpack(">H", data[:2]) user_summary_n_bytes = server.summary_length(n_users_in_summary) user_summary = data[:user_summary_n_bytes] raw_audio = data[user_summary_n_bytes:].view(np.float32) # Encode raw audio packets = raw_audio.reshape([-1, OPUS_FRAME_SAMPLES]) encoded = [] for p in packets: e = np.frombuffer(enc.encode_float(p.tobytes(), OPUS_FRAME_SAMPLES), np.uint8) encoded.append(e) compressed_audio = pack_multi(encoded) # Combine user_summary and compressed audio data data = np.append(user_summary, compressed_audio) with open(os.path.join(LOG_DIR, userid), "a") as log_file: log_file.write("%d %.8f\n"%( time.time(), -1 if client_no_data else rms_volume)) return data.tobytes(), x_audio_metadata