def test_n_frames_audio() -> None: # Number of frames to write n = 2 # Save the audio using OggOpusWriter filename = f"test_ogg_opus_writer__test_{n}_frames_audio.opus" encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") samples_per_second = 48000 encoder.set_sampling_frequency(samples_per_second) channels = 1 encoder.set_channels(channels) frame_size_ms = 20 encoder.set_frame_size(frame_size_ms) # milliseconds frame_size_samples = frame_size_ms * samples_per_second // 1000 writer = pyogg.OggOpusWriter(filename, encoder) # Two bytes per sample, two frames bytes_per_sample = 2 buf = bytearray(b"\x00" * (bytes_per_sample * frame_size_samples * n)) writer.write(memoryview(buf)) # Close the file writer.close() # Test the length of the output opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == bytes_per_sample * frame_size_samples * n
def text2audio(self, text): stub = tts_pb2_grpc.TextToSpeechStub( grpc.secure_channel(self._endpoint, grpc.ssl_channel_credentials())) request = self._build_request(text) metadata = authorization_metadata(self._api_key, self._secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for key, value in responses.initial_metadata(): if key == "x-audio-num-samples": print("Estimated audio duration is " + str(int(value) / self._sample_rate) + " seconds") break f = io.BytesIO() ogg_opus_writer = pyogg.OggOpusWriter(f) ogg_opus_writer.set_application("audio") ogg_opus_writer.set_sampling_frequency(self._sample_rate) ogg_opus_writer.set_channels(1) ogg_opus_writer.set_frame_size(20) # milliseconds for stream_response in responses: ogg_opus_writer.encode(stream_response.audio_chunk) # close writer ogg_opus_writer.close() # get audio f.seek(0) audio = f.getvalue() return audio
def test_custom_pre_skip() -> None: # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" samples_of_pre_skip = 500 encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) channels = 1 encoder.set_channels(channels) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(filename, encoder, custom_pre_skip=samples_of_pre_skip) # Create a buffer of silence bytes_per_sample = 2 buf = bytearray(b"\x00" * bytes_per_sample * channels * samples_of_pre_skip) writer.write(memoryview(buf)) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
def test_close_twice(): mock_file = MockFile() writer = pyogg.OggOpusWriter(mock_file) writer.set_application("audio") writer.set_sampling_frequency(48000) writer.set_channels(2) writer.set_frame_size(20) # milliseconds writer.close() writer.close()
def test_error_after_close(): mock_file = MockFile() writer = pyogg.OggOpusWriter(mock_file) writer.set_application("audio") writer.set_sampling_frequency(48000) writer.set_channels(2) writer.set_frame_size(20) # milliseconds writer.close() with pytest.raises(pyogg.PyOggError): writer.encode(None)
def test_duplicate_audio(): # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) # Save the audio using OggOpusWriter out_filename = "test_ogg_opus_writer__test_duplicate_audio.opus" writer = pyogg.OggOpusWriter(out_filename) writer.set_application("audio") writer.set_sampling_frequency(48000) writer.set_channels(2) writer.set_frame_size(20) # milliseconds writer.encode(opus_file.buffer)
def test_close_twice() -> None: mock_file = MockFile() encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds # MyPy complains at the MockFile class, but we can ignore the # error. writer = pyogg.OggOpusWriter(mock_file, encoder) # type: ignore writer.close() writer.close()
def test_error_after_close() -> None: mock_file = MockFile() encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds # MyPy complains at the MockFile class, but we can ignore the # error. writer = pyogg.OggOpusWriter(mock_file, encoder) # type: ignore writer.close() with pytest.raises(pyogg.PyOggError): writer.write(memoryview(bytearray(b"")))
def send_sample_opus(self): now = time.perf_counter_ns() encoder = pyogg.OpusBufferedEncoder() encoder.set_application(self.opus_application) encoder.set_sampling_frequency(self.sampling_rate) encoder.set_channels(1) encoder.set_frame_size(20) # 20ms is the opus default with io.BytesIO() as f: ogg = pyogg.OggOpusWriter(f, encoder) ogg.write(self.recording_buffer) ogg.close() dur = (time.perf_counter_ns() - now) / (1000 * 1000 * 1000) print("Encoding time: %.1f s" % dur) self.send_sample_payload(f.getvalue(), "audio/ogg;codecs=opus")
def test_already_loaded_file() -> None: # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) # Save the audio using OggOpusWriter out_filename = "test_ogg_opus_writer__test_duplicate_audio.opus" f = open(out_filename, "wb") encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(f, encoder) writer.write(opus_file.buffer) # Close the file writer.close() f.close()
def test_zero_length_audio(): # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" writer = pyogg.OggOpusWriter(filename) writer.set_application("audio") writer.set_sampling_frequency(48000) channels = 1 writer.set_channels(channels) writer.set_frame_size(20) # milliseconds buf = b"" writer.encode(buf) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
def test_zero_length_audio() -> None: # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) channels = 1 encoder.set_channels(channels) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(filename, encoder) buf = memoryview(bytearray(b"")) writer.write(buf) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
filename_1 = "left-demo-1s.opus" file_1 = pyogg.OpusFile(filename_1) # Read the second file filename_2 = "right-demo-1s.opus" file_2 = pyogg.OpusFile(filename_2) # Create a buffered encoder encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds # Open a third file for writing. This will hold the concatenated # audio of the two files. filename_out = "output-concat.opus" file_out = pyogg.OggOpusWriter(filename_out, encoder) # Pass the data from the first file to the writer file_out.write(file_1.buffer) # Pass the data from the second file to the writer file_out.write(file_2.buffer) # Close the file (or delete the reference to file_out, which will # automatically close the file for you). file_out.close() print("Finished")
wave_read = wave.open(filename, "rb") print("Reading wav from file '{:s}'".format(filename)) # Extract the wav's specification channels = wave_read.getnchannels() print("Number of channels:", channels) samples_per_second = wave_read.getframerate() print("Sampling frequency:", samples_per_second) bytes_per_sample = wave_read.getsampwidth() original_length = wave_read.getnframes() print("Length:", original_length) # Create an OggOpusWriter output_filename = filename + ".opus" print("Writing OggOpus file to '{:s}'".format(output_filename)) ogg_opus_writer = pyogg.OggOpusWriter(output_filename) ogg_opus_writer.set_application("audio") ogg_opus_writer.set_sampling_frequency(samples_per_second) ogg_opus_writer.set_channels(channels) ogg_opus_writer.set_frame_size(20) # milliseconds # Calculate the desired frame size (in samples per channel) desired_frame_duration = 20 / 1000 # milliseconds desired_frame_size = int(desired_frame_duration * samples_per_second) # Loop through the wav file's PCM data and encode it as Opus chunk_size = 1000 # bytes while True: # Get data from the wav file pcm = wave_read.readframes(chunk_size)
#!/usr/bin/env python3 import time import pyogg import io now = time.perf_counter_ns() encoder = pyogg.OpusBufferedEncoder() encoder.set_application("voip") encoder.set_sampling_frequency(16000) encoder.set_channels(1) encoder.set_frame_size(20) # ms with io.BytesIO() as f: ogg = pyogg.OggOpusWriter(f, encoder) ogg.write(bytearray(100)) ogg.close() dur = (time.perf_counter_ns() - now) / 1000 print(f"Encoding time: {dur} ms")
bytes_per_sample = wave_read.getsampwidth() original_length = wave_read.getnframes() print("Length:", original_length) # Create a OpusBufferedEncoder opus_buffered_encoder = pyogg.OpusBufferedEncoder() opus_buffered_encoder.set_application("audio") opus_buffered_encoder.set_sampling_frequency(samples_per_second) opus_buffered_encoder.set_channels(channels) opus_buffered_encoder.set_frame_size(20) # milliseconds # Create an OggOpusWriter output_filename = filename+".opus" print("Writing OggOpus file to '{:s}'".format(output_filename)) ogg_opus_writer = pyogg.OggOpusWriter( output_filename, opus_buffered_encoder ) # Calculate the desired frame size (in samples per channel) desired_frame_duration = 20/1000 # milliseconds desired_frame_size = int(desired_frame_duration * samples_per_second) # Loop through the wav file's PCM data and write it as OggOpus chunk_size = 1000 # bytes while True: # Get data from the wav file pcm = wave_read.readframes(chunk_size) # Check if we've finished reading the wav file if len(pcm) == 0: break
def record(self): # Create deferred to return self._deferred = defer.Deferred() # Load the intro audio intro_audio = self._load_intro_audio() # Load the backing audio backing_audio = self._load_backing_audio() # Get the recording latency and calculate the desired duration try: recording_latency_s = self._context["recording_latency"] except KeyError: log.error("Recording latency was not available in the current " + "context. Assuming zero latency.") recording_latency_s = 0 recording_latency_samples = int(recording_latency_s * self.samples_per_second) desired_samples = (len(backing_audio[0]) + recording_latency_samples) # Create an OggOpusWriter witht the file_like given in the # constructor. self._writer = pyogg.OggOpusWriter( self._file_like, custom_pre_skip=recording_latency_samples) self._writer.set_application("audio") self._writer.set_sampling_frequency(self.samples_per_second) self._writer.set_channels(1) self._writer.set_frame_size(20) # milliseconds # Create a dict for variables used in callback class Variables: def __init__(self): self.state = RecordingMode.State.INTRO self.index = 0 self.recorded_length = 0 self.backing_length = len(backing_audio[0]) v = Variables() # Start processing the audio self._looping_call = LoopingCall(self._write_audio) reactor = self._context["reactor"] reactor.callWhenRunning(lambda: self._looping_call.start(20 / 1000)) # Sounddevice callback for audio processing def callback(indata, outdata, frames, time, status): if status: print(status) if v.state == RecordingMode.State.INTRO: # Play the intro if v.index + frames <= len(intro_audio): outdata[:] = intro_audio[v.index:v.index + frames] v.index += frames else: remaining = len(intro_audio) - v.index outdata[:remaining] = intro_audio[:remaining] outdata[remaining:].fill(0) # Transition to RECORD v.state = RecordingMode.State.RECORD v.index = 0 elif v.state == RecordingMode.State.RECORD: # Mix the backing audio mixed_backing_audio = None for pcm in backing_audio: pcm_section = pcm[v.index:v.index + frames] if mixed_backing_audio is None: mixed_backing_audio = pcm_section else: mixed_backing_audio += pcm_section v.index += frames # Play the backing audio if len(mixed_backing_audio) == frames: outdata[:] = mixed_backing_audio[:] else: outdata[:len(mixed_backing_audio)] = ( mixed_backing_audio[:]) outdata[len(mixed_backing_audio):].fill(0) # Place the input into the ring buffer, from where it will # be processed in a non-time-critical thread. Only save # the desired duration of audio if v.recorded_length + frames < desired_samples: self._ring_buffer.put(indata) v.recorded_length += frames else: # This is the last section of audio we need remaining = desired_samples - v.recorded_length self._ring_buffer.put(indata[:remaining]) v.recorded_length += remaining raise sd.CallbackStop def callback_finished(): self._finished = True # Create Stream self._stream = sd.Stream(samplerate=48000, channels=1, dtype=numpy.float32, latency=200 / 1000, callback=callback, finished_callback=callback_finished) # Start the recording. Stop called by rasing sd.CallbackStop # in callback when sufficient audio has been recorded. self._stream.start() return self._deferred