예제 #1
0
파일: audio.py 프로젝트: tnellesen/DSAlign
def read_opus(opus_file):
    pcm_buffer_size, audio_format = read_opus_header(opus_file)
    rate, channels, _ = audio_format
    frame_size = get_opus_frame_size(rate)
    import opuslib
    decoder = opuslib.Decoder(rate, channels)
    audio_data = bytearray()
    while len(audio_data) < pcm_buffer_size:
        chunk_len = unpack_number(opus_file.read(OPUS_CHUNK_LEN_SIZE))
        chunk = opus_file.read(chunk_len)
        decoded = decoder.decode(chunk, frame_size)
        audio_data.extend(decoded)
    audio_data = audio_data[:pcm_buffer_size]
    return audio_format, audio_data
예제 #2
0
    def __init__(
        self, source: str, volume=0.5, guild_id: int = 0, *args, **kwargs
    ):
        self.volume = volume
        super().__init__(source, *args, **kwargs)
        self.guild_id = guild_id
        self.bytes_read = 0

        self.decoder = opuslib.Decoder(
            OpusEncoder.SAMPLING_RATE, OpusEncoder.CHANNELS
        )
        self.encoder = opuslib.Encoder(
            OpusEncoder.SAMPLING_RATE, OpusEncoder.CHANNELS, "voip"
        )
예제 #3
0
    def __init__(self, mumble_object):
        self.mumble_object = mumble_object

        self.queue = deque()
        self.start_sequence = None
        self.start_time = None

        self.receive_sound = True

        self.lock = Lock()

        # to be sure, create every supported decoders for all users
        # sometime, clients still use a codec for a while after server request another...
        self.decoders = {
            PYMUMBLE_AUDIO_TYPE_OPUS: opuslib.Decoder(PYMUMBLE_SAMPLERATE, 1)
        }
예제 #4
0
파일: player.py 프로젝트: nanotone/opusjam
 def __init__(self):
     self.accept_rate = 1.0
     self.ready_rate = 1.0
     self.ready_next_rate = 0.0
     self.decoded = None
     self.decoder = opuslib.Decoder(24000, 1)
     self.decoder_lock = threading.Lock()
     self.dupe_check = util.DupeCheck()
     self.heap = []
     self.heap_lock = threading.Lock()
     self.last_packet_time = None
     self.wake_event = threading.Event()
     self.wake_lock = threading.Lock()
     self.decoder_thread = util.start_daemon(self.run_decoder)
     self.last_missing = False
     self.last_played = None
예제 #5
0
    def test_gain(self):
        decoder = opuslib.Decoder(48000, 2)

        self.assertEqual(decoder.gain, 0)

        try:
            decoder.gain = -32769
        except opuslib.OpusError as exc:
            self.assertEqual(exc.code, opuslib.BAD_ARG)

        try:
            decoder.gain = 32768
        except opuslib.OpusError as exc:
            self.assertEqual(exc.code, opuslib.BAD_ARG)

        decoder.gain = -15
        self.assertEqual(decoder.gain, -15)
예제 #6
0
파일: client.py 프로젝트: Doyeon-k/pyvoip
    def __init__(self, remote_host_address, remote_host_port, host_address,
                 rtp_port):
        self.CHUNK_SIZE = 960  #640 #320
        self.mic = MicDevice(sr=self.SAMPLE_RATE, chunk_size=self.CHUNK_SIZE)

        self.sender = RTPSendClient(remote_host_address=remote_host_address,
                                    remote_host_port=remote_host_port)

        self.receiver = RTPReceiveClient(host_address=host_address,
                                         rtp_port=rtp_port)
        self.opus_encode = opuslib.Encoder(self.SAMPLE_RATE, 1,
                                           opuslib.APPLICATION_VOIP)
        self.opus_decode = opuslib.Decoder(self.SAMPLE_RATE, 1)

        self.aout = AudioOutput()
        self.frame_count = 0
        self.start_ts = -1.0
        print('init client')
예제 #7
0
 def __enter__(self):
     if self._encoding_hint == tts_pb2.RAW_OPUS:
         import opuslib
         self._encoding_hint = tts_pb2.LINEAR16
         self._opus_decoder = opuslib.Decoder(self._sample_rate_hint, 1)
     else:
         self._opus_decoder = None
     import pyaudio
     pyaudio_lib = pyaudio.PyAudio()
     self.callback(pyaudio_lib.terminate)
     format, self._width = _encoding_to_pyaudio_format_and_width(
         self._encoding_hint)
     self._stream = pyaudio_lib.open(output=True,
                                     channels=1,
                                     format=format,
                                     rate=self._sample_rate_hint)
     self.callback(self._stream.close)
     self.callback(self._stream.stop_stream)
     return super().__enter__()
예제 #8
0
        def worker():  # worker thread
            nonlocal loop, im, event, f
            pcm = im.tobytes()
            if len(pcm) % 2:
                pcm = b'\0' + pcm
            encoder = opuslib.Encoder(48000, 1, 'restricted_lowdelay')
            frames = []
            for x in range(0, len(pcm), 960 * 2):
                frames.append(encoder.encode(pcm[x:x + 960 * 2], 960))
            opcmlen = len(pcm)
            decoder = opuslib.Decoder(48000, 1)
            data = b''
            for frame in frames:
                data += decoder.decode(frame, 960)
            im = Image.frombytes(im.mode, im.size, data[len(data) - opcmlen:])
            im = im.convert("RGB")

            f = io.BytesIO()
            im.save(f, "JPEG")
            f.seek(0)
            loop.call_soon_threadsafe(event.set)
예제 #9
0
 def test_reset_state(cls):
     decoder = opuslib.Decoder(48000, 2)
     decoder.reset_state()
예제 #10
0
 def test_get_bandwidth(self):
     decoder = opuslib.Decoder(48000, 2)
     self.assertEqual(decoder.bandwidth, 0)
예제 #11
0
 def __init__(self, samples=48000, frames=960, channels=1):
     self.frames = frames
     self.encoder = opuslib.Encoder(samples, channels,
                                    opuslib.APPLICATION_VOIP)
     self.decoder = opuslib.Decoder(samples, channels)
예제 #12
0
def handle_post(in_data_raw, query_params, headers):
    global last_request_clock
    global first_client_write_clock
    global first_client_total_samples
    global first_client_value
    global global_volume
    global song_end_clock
    global song_start_clock

    # NOTE NOTE NOTE:
    # * All `clock` variables are measured in samples.
    # * All `clock` variables represent the END of an interval, NOT the
    #   beginning. It's arbitrary which one to use, but you have to be
    #   consistent, and trust me that it's slightly nicer this way.

    # Note: This will eventually create a precision problem for the JS
    #   clients, which are using floats. Specifically, at 44100 Hz, it will
    #   fail on February 17, 5206.
    server_clock = int(time.time() * SAMPLE_RATE)

    client_write_clock = query_params.get("write_clock", None)
    if client_write_clock is not None:
        client_write_clock = int(client_write_clock[0])
    client_read_clock = query_params.get("read_clock", None)
    if client_read_clock is not None:
        client_read_clock = int(client_read_clock[0])
    else:
        raise ValueError("no client read clock")

    n_samples = query_params.get("n_samples", None)
    if n_samples is not None:
        n_samples = int(n_samples[0])

    userid = None
    userids = query_params.get("userid", None)

    username = None
    usernames = query_params.get("username", None)
    if not userids or not usernames:
        raise ValueError("missing username/id")

    userid, = userids
    username, = usernames
    if not userid or not username:
        raise ValueError("missing username/id")

    if client_write_clock is None:
        # New session, write some debug info to disk
        logging.debug("*** New client:" + str(headers) + str(query_params) +
                      "\n\n")

    # This indicates a new session, so flush everything. (There's probably a better way to handle this.)
    prev_last_write_clock = None
    if (client_write_clock is None) and (userid in users):
        prev_last_write_clock = users[userid].last_write_clock
        del users[userid]

    update_users(userid, username, server_clock, client_read_clock)
    user = users[userid]
    if user.last_write_clock is None:
        user.last_write_clock = prev_last_write_clock

    volumes = query_params.get("volume", None)
    if volumes:
        volume, = volumes
        global_volume = math.exp(6.908 * float(volume)) / 1000

    msg_chats = query_params.get("chat", None)
    if msg_chats:
        msg_chats, = msg_chats
        msg_chats = json.loads(msg_chats)
        for other_userid, other_user in users.items():
            if other_userid != userid:
                for msg_chat in msg_chats:
                    other_user.chats_to_send.append((username, msg_chat))

    mic_volumes = query_params.get("mic_volume", None)
    if mic_volumes:
        mic_volume, = mic_volumes
        for other_userid, new_mic_volume in json.loads(mic_volume):
            if other_userid in users:
                if new_mic_volume > 2:
                    new_mic_volume = 2
                elif new_mic_volume < 0:
                    new_mic_volume = 0

                users[other_userid].mic_volume = new_mic_volume

                # https://www.dr-lex.be/info-stuff/volumecontrols.html
                # Make 1 be unity
                users[other_userid].scaled_mic_volume = math.exp(
                    6.908 * new_mic_volume * .5) / math.exp(6.908 * 0.5)

    if query_params.get("request_lead", None):
        assign_delays(userid)
        song_start_clock = None
        song_end_clock = 0

    if query_params.get("mark_start_singing", None):
        song_start_clock = user.last_write_clock
        song_end_clock = 0

    if query_params.get("mark_stop_singing", None):
        song_end_clock = user.last_write_clock

        # They're done singing, send them to the end.
        user.delay_to_send = max_position

    in_data = np.frombuffer(in_data_raw, dtype=np.uint8)

    # Audio from clients is summed, so we need to clear the circular
    #   buffer ahead of them. The range we are clearing was "in the
    #   future" as of the last request, and we never touch the future,
    #   so nothing has touched it yet "this time around".
    if last_request_clock is not None:
        clear_samples = min(server_clock - last_request_clock, QUEUE_LENGTH)
        wrap_assign(audio_queue, last_request_clock,
                    np.zeros(clear_samples, np.float32))
        wrap_assign(n_people_queue, last_request_clock,
                    np.zeros(clear_samples, np.int16))

    saved_last_request_clock = last_request_clock
    last_request_clock = server_clock

    if not user.opus_state:
        # initialize
        user.opus_state = (opuslib.Encoder(SAMPLE_RATE, CHANNELS,
                                           opuslib.APPLICATION_AUDIO),
                           opuslib.Decoder(SAMPLE_RATE, CHANNELS))
    (enc, dec) = user.opus_state

    # If the user does not send us any data, we will treat it as silence of length n_samples. This is useful if they are just starting up.
    if len(in_data) == 0:
        if n_samples is None:
            raise ValueError("Must provide either n_samples or data")
        in_data = np.zeros(n_samples, np.float32)
    else:
        packets = unpack_multi(in_data)
        decoded = []
        for p in packets:
            d = dec.decode_float(p.tobytes(),
                                 OPUS_FRAME_SAMPLES,
                                 decode_fec=False)
            decoded.append(np.frombuffer(d, np.float32))
        in_data = np.concatenate(decoded)

    # Sending n_samples is optional if data is sent, but in case of both they must match
    if n_samples is None:
        n_samples = len(in_data)
    if n_samples != len(in_data):
        raise ValueError("Client is confused about how many samples it sent")

    if client_write_clock is None:
        pass
    elif client_write_clock - n_samples < server_clock - QUEUE_LENGTH:
        # Client is too far behind and going to wrap the buffer. :-(
        raise ValueError("Client's write clock is too far in the past")
    else:
        if user.last_seen_write_clock is not None:
            # For debugging purposes only
            if client_write_clock - n_samples != user.last_seen_write_clock:
                raise ValueError(
                    f'Client write clock desync ('
                    f'{client_write_clock - n_samples} - '
                    f'{user.last_seen_write_clock} = '
                    f'{client_write_clock - n_samples - user.last_seen_write_clock})'
                )
            if user.last_write_clock <= song_end_clock <= client_write_clock:
                user.delay_to_send = max_position

        user.last_seen_write_clock = client_write_clock
        if client_write_clock is not None:
            user.last_write_clock = client_write_clock

        in_data *= user.scaled_mic_volume

        # Don't keep any input unless a song is in progress.
        if (song_start_clock and client_write_clock > song_start_clock
                and (not song_end_clock
                     or client_write_clock - n_samples < song_end_clock)):
            old_audio = wrap_get(audio_queue, client_write_clock - n_samples,
                                 n_samples)
            new_audio = old_audio + in_data
            wrap_assign(audio_queue, client_write_clock - n_samples, new_audio)

            old_n_people = wrap_get(n_people_queue,
                                    client_write_clock - n_samples, n_samples)
            new_n_people = old_n_people + np.ones(n_samples, np.int16)
            wrap_assign(n_people_queue, client_write_clock - n_samples,
                        new_n_people)

    # Why subtract n_samples above and below? Because the future is to the
    #   right. So when a client asks for n samples at time t, what they
    #   actually want is "the time interval ending at t", i.e. [t-n, t). Since
    #   the latest possible time they can ask for is "now", this means that
    #   the latest possible time interval they can get is "the recent past"
    #   instead of "the near future".
    # This doesn't matter to the clients if they all always use the same value of
    #   n_samples, but it matters if n_samples changes, and it matters for
    #   the server's zeroing.

    # For debugging purposes only
    if user.last_seen_read_clock is not None:
        if client_read_clock - n_samples != user.last_seen_read_clock:
            raise ValueError(
                f'Client read clock desync ('
                f'{client_read_clock - n_samples} - '
                f'{user.last_seen_read_clock} = '
                f'{client_read_clock - n_samples - user.last_seen_read_clock})'
            )
    user.last_seen_read_clock = client_read_clock

    if query_params.get("loopback", [None])[0] == "true":
        data = in_data
    else:
        data = wrap_get(audio_queue, client_read_clock - n_samples, n_samples)
        n_people = wrap_get(n_people_queue, client_read_clock - n_samples,
                            n_samples)

        # We could scale volume by having n_people be the number of
        # earlier people and then scale by a simple 1/n_people.  But a
        # curve of (1 + X) / (n_people + X) falls a bit less
        # dramatically and should sound better.
        #
        # Compare:
        #   https://www.wolframalpha.com/input/?i=graph+%281%29+%2F+%28x%29+from+1+to+10
        #   https://www.wolframalpha.com/input/?i=graph+%281%2B3%29+%2F+%28x%2B3%29+from+1+to+10
        data = data * (1 + N_PHANTOM_PEOPLE) / (n_people + N_PHANTOM_PEOPLE)

        data *= global_volume

    packets = data.reshape([-1, OPUS_FRAME_SAMPLES])
    encoded = []
    for p in packets:
        e = np.frombuffer(enc.encode_float(p.tobytes(), OPUS_FRAME_SAMPLES),
                          np.uint8)
        encoded.append(e)
    data = pack_multi(encoded).tobytes()

    x_audio_metadata = json.dumps({
        "server_clock": server_clock,
        "server_sample_rate": SAMPLE_RATE,
        "last_request_clock": saved_last_request_clock,
        "client_read_clock": client_read_clock,
        "client_write_clock": client_write_clock,
        "user_summary": user_summary(),
        "chats": user.chats_to_send,
        "delay_seconds": user.delay_to_send,
        "song_start_clock": song_start_clock,
        # Both the following uses units of 128-sample frames
        "queue_size": QUEUE_LENGTH / FRAME_SIZE,
    })

    user.chats_to_send.clear()
    user.delay_to_send = None

    return data, x_audio_metadata
            text=
            "И мысли тоже тяжелые и медлительные, падают неторопливо и редко одна за другой, точно песчинки "
            "в разленившихся песочных часах."),
        audio_config=tts_pb2.AudioConfig(
            audio_encoding=tts_pb2.RAW_OPUS,
            sample_rate_hertz=sample_rate,
        ),
    )


pyaudio_lib = pyaudio.PyAudio()
f = pyaudio_lib.open(output=True,
                     channels=1,
                     format=pyaudio.paInt16,
                     rate=sample_rate)
opus_decoder = opuslib.Decoder(sample_rate, 1)

stub = tts_pb2_grpc.TextToSpeechStub(
    grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
request = build_request()
metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts")
responses = stub.StreamingSynthesize(request, metadata=metadata)
for key, value in responses.initial_metadata():
    if key == "x-audio-duration-seconds":
        print("Estimated audio duration is {:.2f} seconds".format(
            float(value)))
        break
for stream_response in responses:
    f.write(
        opus_decoder.decode(stream_response.audio_chunk,
                            MAX_ALLOWED_FRAME_RATE))
예제 #14
0
    wave_out = wave.open('wav_out1.wav', 'w')
    wave_out.setnchannels(2)
    wave_out.setsampwidth(2)
    wave_out.setframerate(48000)
    wave_out.writeframes(pcm)
    wave_out.close()

    #framesize is per channel samples, but max is 60ms frame, ie 2880
    # not particularly good for storage, since we need to break it up into
    # many frames

    encoder = opuslib.Encoder(48000, 2, opuslib.APPLICATION_AUDIO)
    opus_encode = encoder.encode(pcm_data=pcm, frame_size=2880)
    print(len(opus_encode))

    decoder = opuslib.Decoder(48000, 2)
    opus_decode = decoder.decode(opus_data=opus_encode, frame_size=2880)
    print(len(opus_decode))

    #raw = struct.unpack('h' * (len(opus_decode)//2), opus_decode)
    #raw = raw[0::2]
    #print(len(raw))

    out = wave.open('wav_out2.wav', 'w')
    out.setnchannels(2)
    out.setsampwidth(2)
    out.setframerate(48000)
    out.writeframes(opus_decode)
    out.close()
예제 #15
0
def handle_post(userid, n_samples, in_data_raw,
                query_string, print_status=True, client_address=None) -> Tuple[Any, str]:
    if not userid.isdigit():
        raise ValueError("UserID must be numeric; got: %r"%userid)
    try:
        enc, dec = users[userid]
    except KeyError:
        enc = opuslib.Encoder(server.SAMPLE_RATE, CHANNELS,
                              opuslib.APPLICATION_AUDIO)
        dec = opuslib.Decoder(server.SAMPLE_RATE, CHANNELS)
        users[userid] = enc, dec

    in_data = np.frombuffer(in_data_raw, dtype=np.uint8)

    # If the user does not send us any data, we will treat it as silence of length n_samples. This is useful if they are just starting up.
    client_no_data = len(in_data)==0
    if client_no_data:
        if n_samples == 0:
            raise ValueError("Must provide either n_samples or data")
        in_data = np.zeros(n_samples, np.float32)
    else:
        packets = unpack_multi(in_data)
        decoded = []
        for p in packets:
            d = dec.decode_float(p.tobytes(), OPUS_FRAME_SAMPLES, decode_fec=False)
            decoded.append(np.frombuffer(d, np.float32))
        in_data = np.concatenate(decoded)

    # Sending n_samples is optional if data is sent, but in case of both they must match
    if n_samples == 0:
        n_samples = len(in_data)
    if n_samples != len(in_data):
        raise ValueError("Client is confused about how many samples it sent (got %s expected %s" % (n_samples, len(in_data)))

    rms_volume = calculate_volume(in_data)
    # This is only safe because query_string is guaranteed to already contain
    #   at least the userid parameter.
    query_string += '&rms_volume=%s'%rms_volume

    data, x_audio_metadata = handle_json_post(
        in_data, query_string, print_status, client_address=client_address)

    # Divide data into user_summary and raw audio data
    n_users_in_summary, = struct.unpack(">H", data[:2])
    user_summary_n_bytes = server.summary_length(n_users_in_summary)

    user_summary = data[:user_summary_n_bytes]
    raw_audio = data[user_summary_n_bytes:].view(np.float32)

    # Encode raw audio
    packets = raw_audio.reshape([-1, OPUS_FRAME_SAMPLES])
    encoded = []
    for p in packets:
        e = np.frombuffer(enc.encode_float(p.tobytes(), OPUS_FRAME_SAMPLES), np.uint8)
        encoded.append(e)
    compressed_audio = pack_multi(encoded)

    # Combine user_summary and compressed audio data
    data = np.append(user_summary, compressed_audio)

    with open(os.path.join(LOG_DIR, userid), "a") as log_file:
        log_file.write("%d %.8f\n"%(
            time.time(),
            -1 if client_no_data else rms_volume))

    return data.tobytes(), x_audio_metadata