def test_same_data_as_opus_file(): # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" # Open the file using OpusFile to read the entire file into memory opus_file = pyogg.OpusFile(filename) # Open the file (again) using OpusFileStream, which does not read # the entire file immediately. opus_stream = pyogg.OpusFileStream(filename) # Loop through the OpusFileStream until we've read all the data buf_all = bytes() while True: # Read the next part of the stream buf = opus_stream.get_buffer() # Check if we've reached the end of the stream if buf is None: break # Add the bytes we've read to buf_all. Note that this # technique isn't efficient and shouldn't be used in # production code. buf_all += buf assert buf_all == opus_file.buffer
def test_custom_pre_skip() -> None: # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" samples_of_pre_skip = 500 encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) channels = 1 encoder.set_channels(channels) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(filename, encoder, custom_pre_skip=samples_of_pre_skip) # Create a buffer of silence bytes_per_sample = 2 buf = bytearray(b"\x00" * bytes_per_sample * channels * samples_of_pre_skip) writer.write(memoryview(buf)) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
def test_same_data_as_opus_file_using_as_array(): import numpy # type: ignore # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" # Open the file using OpusFile to read the entire file into memory opus_file = pyogg.OpusFile(filename) # Open the file (again) using OpusFileStream, which does not read # the entire file immediately. opus_stream = pyogg.OpusFileStream(filename) # Loop through the OpusFileStream until we've read all the data buf_all = None while True: # Read the next part of the stream buf = opus_stream.get_buffer_as_array() # Check if we've reached the end of the stream if buf is None: break # Add the bytes we've read to buf_all. Note that this # technique isn't efficient and shouldn't be used in # production code. if buf_all is None: buf_all = buf else: buf_all = numpy.concatenate((buf_all, buf)) # Check that every byte is identical for both buffers assert numpy.all(buf_all == opus_file.as_array())
def audio(): speech = request.files['speech'] OUTPUT_FILE = './speech.wav' if speech.mimetype == 'audio/ogg': speech.save('./speech.ogg') opus = pyogg.OpusFile('./speech.ogg') a = array.array('h') for i in range(int(opus.buffer_length / 2)): # pyogg doubles it for some reason a.append(opus.buffer[i]) with wave.open(OUTPUT_FILE, 'wb') as writer: writer.setnchannels(opus.channels) writer.setframerate(opus.frequency) writer.setnframes(opus.buffer_length) writer.setsampwidth(2) writer.writeframesraw(a) elif speech.mimetype == 'audio/wav': speech.save(OUTPUT_FILE) r = sr.Recognizer() recognized = "" with sr.AudioFile(OUTPUT_FILE) as source: audio = r.record(source) recognized = r.recognize_sphinx(audio) print(recognized) label = model.label('./speech.wav') return jsonify({'label': label, 'speech': recognized})
def test_n_frames_audio() -> None: # Number of frames to write n = 2 # Save the audio using OggOpusWriter filename = f"test_ogg_opus_writer__test_{n}_frames_audio.opus" encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") samples_per_second = 48000 encoder.set_sampling_frequency(samples_per_second) channels = 1 encoder.set_channels(channels) frame_size_ms = 20 encoder.set_frame_size(frame_size_ms) # milliseconds frame_size_samples = frame_size_ms * samples_per_second // 1000 writer = pyogg.OggOpusWriter(filename, encoder) # Two bytes per sample, two frames bytes_per_sample = 2 buf = bytearray(b"\x00" * (bytes_per_sample * frame_size_samples * n)) writer.write(memoryview(buf)) # Close the file writer.close() # Test the length of the output opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == bytes_per_sample * frame_size_samples * n
def read_opus(self, file_name): max_try = 2 y_int, sr = None, None while max_try > 0: try: if self.default_ogg_module == 'librosa': if self.verbose: print(' - read_opus, Leyendo archivo:', file_name, 'con librosa') sample_rate = 16000 y, sr = librosa.load(file_name, sr=sample_rate) # Cortamos los archivos mayores a 19 s max_len = 19 * sr if y.shape[0] > max_len: y = y[:max_len] y_int = (np.iinfo(np.int16).max / np.abs(y).max() * y).astype(np.int16) elif self.default_ogg_module == 'pyogg': if self.verbose: print(' - read_opus, Leyendo archivo:', file_name, 'pyogg') of = pyogg.OpusFile(file_name) sr = of.frequency b_len = of.buffer_length // 2 y_int = np.array(of.buffer[:b_len], dtype=np.int16) # Cortamos los archivos mayores a 19 s max_len = 19 * sr if y.shape[0] > max_len: y_int = y_int[:max_len] sr = sr // 3 y_int = y_int[::3].copy() max_try = 0 except: print( ' - ERROR, opus2wav: module failure {}, changing default module' .format(self.default_ogg_module), file=sys.stderr) if self.default_ogg_module == 'pyogg': self.default_ogg_module = 'librosa' else: self.default_ogg_module = 'pyogg' max_try += 0 if y_int is None: raise Exception(' - ERROR, read_opus: unable to read opus ogg.') return y_int, sr
def check_play_audio(): filename = pkg_resources.resource_filename("singtclient", "sounds/warm-up.opus") print("Loading sound file...") opus_file = pyogg.OpusFile(filename) pcm = opus_file.as_array() print("Playing...") sd.play(pcm, opus_file.frequency)
def test_output_via_wav(): # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) import wave wave_out = wave.open("test_opus_file__test_output_via_wav.wav", "wb") wave_out.setnchannels(opus_file.channels) wave_out.setsampwidth(opus_file.bytes_per_sample) wave_out.setframerate(opus_file.frequency) wave_out.writeframes(opus_file.buffer)
def prepare(self, backing_track_filename): # Grab the lock so that we're threadsafe with self._lock: # Open the backing track and store it as PCM buffer opus_file = pyogg.OpusFile(backing_track_filename) self._backing_track_pcm = opus_file.as_array() self._backing_track_pcm = self._backing_track_pcm.astype(numpy.float32) / (2**15) # Create an encoder self._encoder = opus_helpers.create_encoder( self._backing_track_pcm, self._samples_per_second )
def test_output_via_wav(pyogg_config: Config) -> None: # Load the demonstration file that is exactly 5 seconds long filename = str(pyogg_config.rootdir / "examples/left-right-demo-5s.opus") opus_file = pyogg.OpusFile(filename) import wave out_filename = str(pyogg_config.outdir / "test_opus_file__test_output_via_wav.wav") wave_out = wave.open(out_filename, "wb") wave_out.setnchannels(opus_file.channels) wave_out.setsampwidth(opus_file.bytes_per_sample) wave_out.setframerate(opus_file.frequency) wave_out.writeframes(opus_file.buffer)
def test_as_array(): # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) # Test that the loaded file is indeed 5 seconds long (using # as_array()) expected_duration_seconds = 5 samples_per_second = opus_file.frequency expected_duration_samples = (expected_duration_seconds * samples_per_second) duration_samples = opus_file.as_array().shape[0] assert duration_samples == expected_duration_samples
def test_duplicate_audio(): # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) # Save the audio using OggOpusWriter out_filename = "test_ogg_opus_writer__test_duplicate_audio.opus" writer = pyogg.OggOpusWriter(out_filename) writer.set_application("audio") writer.set_sampling_frequency(48000) writer.set_channels(2) writer.set_frame_size(20) # milliseconds writer.encode(opus_file.buffer)
def _load_audio(self, path): opus_file = pyogg.OpusFile(str(path)) pcm = opus_file.as_array() # Normalise pcm_float = pcm.astype(numpy.float32) pcm_float /= 2**16 # Convert to mono pcm_float = numpy.mean(pcm_float, axis=1) pcm_float = numpy.reshape(pcm_float, (-1, 1)) return pcm_float
def test_as_bytes(pyogg_config: Config) -> None: # Load the demonstration file that is exactly 5 seconds long filename = str(pyogg_config.rootdir / "examples/left-right-demo-5s.opus") opus_file = pyogg.OpusFile(filename) # Test that the loaded file is indeed 5 seconds long (using # the buffer member variable) expected_duration_seconds = 5 samples_per_second = opus_file.frequency channels = opus_file.channels bytes_per_sample = opus_file.bytes_per_sample expected_duration_bytes = (expected_duration_seconds * samples_per_second * bytes_per_sample * channels) duration_bytes = len(bytes(opus_file.buffer)) assert duration_bytes == expected_duration_bytes
def __init__(self): # Create re-entrant lock so that we threadsafe self._lock = threading.RLock(); # Grab the lock immediately with self._lock: self._samples_per_second = 48000 self._silence_duration_after = 60 # ms self._starting_sound_filename = "sounds/starting.opus" self._playback_level = 0.5 self._monitoring_level = 0.5 # Open the starting sound file and store it as PCM buffer opus_file = pyogg.OpusFile(self._starting_sound_filename) self._starting_sound_pcm = opus_file.as_array() self._starting_sound_pcm = self._starting_sound_pcm.astype(numpy.float32) / (2**15)
def test_already_loaded_file() -> None: # Load the demonstration file that is exactly 5 seconds long filename = "../examples/left-right-demo-5s.opus" opus_file = pyogg.OpusFile(filename) # Save the audio using OggOpusWriter out_filename = "test_ogg_opus_writer__test_duplicate_audio.opus" f = open(out_filename, "wb") encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(f, encoder) writer.write(opus_file.buffer) # Close the file writer.close() f.close()
def test_zero_length_audio(): # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" writer = pyogg.OggOpusWriter(filename) writer.set_application("audio") writer.set_sampling_frequency(48000) channels = 1 writer.set_channels(channels) writer.set_frame_size(20) # milliseconds buf = b"" writer.encode(buf) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
def test_zero_length_audio() -> None: # Save the audio using OggOpusWriter filename = "test_ogg_opus_writer__test_zero_length_audio.opus" encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) channels = 1 encoder.set_channels(channels) encoder.set_frame_size(20) # milliseconds writer = pyogg.OggOpusWriter(filename, encoder) buf = memoryview(bytearray(b"")) writer.write(buf) # Close the file writer.close() # Test the length of the output is 0 opus_file = pyogg.OpusFile(filename) assert len(opus_file.buffer) == 0
import pyogg # Read the first file filename_1 = "left-demo-1s.opus" file_1 = pyogg.OpusFile(filename_1) # Read the second file filename_2 = "right-demo-1s.opus" file_2 = pyogg.OpusFile(filename_2) # Create a buffered encoder encoder = pyogg.OpusBufferedEncoder() encoder.set_application("audio") encoder.set_sampling_frequency(48000) encoder.set_channels(2) encoder.set_frame_size(20) # milliseconds # Open a third file for writing. This will hold the concatenated # audio of the two files. filename_out = "output-concat.opus" file_out = pyogg.OggOpusWriter(filename_out, encoder) # Pass the data from the first file to the writer file_out.write(file_1.buffer) # Pass the data from the second file to the writer file_out.write(file_2.buffer) # Close the file (or delete the reference to file_out, which will # automatically close the file for you). file_out.close()
def test_error_in_filename(): # Load a non-existant file filename = "does-not-exist.opus" with pytest.raises(pyogg.PyOggError): opus_file = pyogg.OpusFile(filename)
""" import time import numpy # type: ignore import pyogg # Specify a file to process opus_file_filename = "left-right-demo-5s.opus" opus_file_stream_filename = "left-right-demo-5s.opus" # Open the file using OpusFile, which reads the entire file # immediately and places it into an internal buffer. start_time = time.time() opus_file = pyogg.OpusFile(opus_file_filename) end_time = time.time() duration = (end_time-start_time)*1000 array = opus_file.as_array() array_index = 0 print("Read {:d} samples from OpusFile (in {:.1f} milliseconds).".format( len(array), duration )) # Open the file using OpusFileStream, which does not read the entire # file immediately. stream = pyogg.OpusFileStream(opus_file_stream_filename) # Loop through the OpusFileStream until we've read all the data samples_read = 0
def play_file(filename): opus_file = pyogg.OpusFile(filename) pcm = opus_file.as_array() sd.play(pcm, opus_file.frequency) sd.wait()
def create_feature_from_audio(filename): import pyogg import numpy as np import ctypes, numpy, pyogg import matplotlib.pyplot as plt import scipy.io.wavfile # https://github.com/Zuzu-Typ/PyOgg/issues/19 # file = pyogg.OpusFile(filename) # stereo # audio_path_opus = "./" file = pyogg.OpusFile(filename) target_datatype = ctypes.c_short * (file.buffer_length // 2 ) # always divide by 2 for some reason buffer_as_array = ctypes.cast(file.buffer, ctypes.POINTER(target_datatype)).contents if file.channels == 1: wav = numpy.array(buffer_as_array) elif file.channels == 2: wav = numpy.array((wav[0::2], wav[1::2])) else: raise NotImplementedError() # This is the final numpy array signal = numpy.transpose(wav) sampling_rate = 48000 print(numpy.shape(wav)) #plt.figure #plt.title("Signal Wave...") #plt.plot(signal) #plt.show() # Calculating features from final_data from pyAudioAnalysis import MidTermFeatures as mF from pyAudioAnalysis import ShortTermFeatures as sF from pyAudioAnalysis import audioBasicIO mid_window = round(0.1 * sampling_rate) mid_step = round(0.1 * sampling_rate) short_window = round(sampling_rate * 0.01) short_step = round(sampling_rate * 0.01) signal = audioBasicIO.stereo_to_mono(signal) print(type(signal)) # print(np.shape(signal)) signal = signal.astype( 'float64' ) # this line is because librosa was making an error - need floats [mid_features, short_features, mid_feature_names] = mF.mid_feature_extraction(signal, sampling_rate, mid_window, mid_step, short_window, short_step) mid_features = np.transpose(mid_features) mid_term_features = mid_features.mean(axis=0) mid_term_features = np.reshape(mid_term_features, (-1, 1)) mid_term_features = np.transpose(mid_term_features) # print(np.shape(mid_term_features)) # len(mid_feature_names) # Getting the classification result with Cough=0, No_Cough=1 from joblib import dump, load from sklearn import preprocessing cough_classifier = load('Cough_NoCough_classifier.joblib') features = preprocessing.StandardScaler().fit_transform(mid_term_features) prediction = cough_classifier.predict(features) # coughs=0 , no_cough = 1 return prediction, mid_term_features
desired_frame_duration = 20 / 1000 # milliseconds desired_frame_size = int(desired_frame_duration * samples_per_second) # Loop through the wav file's PCM data and encode it as Opus chunk_size = 1000 # bytes while True: # Get data from the wav file pcm = wave_read.readframes(chunk_size) # Check if we've finished reading the wav file if len(pcm) == 0: break # Encode the PCM data ogg_opus_writer.encode(pcm) # We've finished writing the file ogg_opus_writer.close() # Check that the output file is that same length as the original print("Reading output file:", output_filename) opus_file = pyogg.OpusFile(output_filename) print("File read") output_length = opus_file.as_array().shape[0] print("Output length:", output_length) if original_length != output_length: print("ERROR: The original length is different to the output length") print("Finished.")
import pyogg import audioop from scipy import signal import numpy as np import matplotlib.pyplot as plt import wave import ctypes opus = pyogg.OpusFile('./speech.ogg') print(opus.buffer_length) bfarr_t = ctypes.c_int16 * (int(opus.buffer_length / 2)) bf = bfarr_t.from_buffer(ctypes.pointer(opus.buffer)) with wave.open('./speech.wav', 'wb') as writer: writer.setnchannels(opus.channels) writer.setframerate(opus.frequency) writer.setsampwidth(2) writer.writeframesraw(a) with wave.open('./speech.wav', 'rb') as reader: print(reader.getparams())
# Display the version of the Opus library version = opus.opus_get_version_string() print("Opus library version: " + str(version.decode('utf-8'))) # Specify the file containing Opus audio #filename = "ff-16b-2c-44100hz.opus" #filename = "gs-16b-1c-44100hz.opus" #filename = "gs-16b-2c-44100hz.opus" filename = "left-right-demo-5s.opus" #filename = "humm-120samples.opus" #filename = "psallite.opus" #filename = "test.opus" # Read the Opus file and place the PCM in a memory buffer print("Reading Opus file...") opusFile = pyogg.OpusFile(filename) # Display information about the file print("\nRead Opus file") print("Channels:" + str(opusFile.channels)) print("Frequency:" + str(opusFile.frequency)) print("Buffer Length (bytes): " + str(opusFile.buffer_length)) # The buffer holds the entire song in memory, however the shape of the # array isn't obvious. Note that the above buffer length is in bytes, # but the PCM values are stored in two-byte ints (shorts). bytesPerSample = ctypes.sizeof(opusFile.buffer.contents) samplesPerChannel = \ opusFile.buffer_length// \ bytesPerSample// \ opusFile.channels