def test_resample_file(resampled_wav): identifiers = path.splitext(path.basename(resampled_wav))[0].split('__') sr_in = int(identifiers[1].split('_')[-1]) sr_out = int(identifiers[2].split('_')[-1]) n_ch = int(identifiers[3].split('_')[-1]) wav_in = f'tests/test-assets/original__sr_{sr_in}__channels_{n_ch}.wav' x, sr = af.read(wav_in, always_2d=True) assert sr == sr_in target, sr = af.read(resampled_wav, always_2d=True) assert sr == sr_out y = audresample.resample( x, sr_in, sr_out, quality=audresample.define.ResampleQuality.HIGH, ) assert y.shape[0] == n_ch np.testing.assert_allclose(y, target, rtol=0.0, atol=0.037) errors = np.abs(np.ravel(y) - np.ravel(target)) mean_error = np.mean(errors) assert mean_error < 5.0e-5 outliers = errors[errors > 1.0e-3] assert outliers.size / errors.size < 1.0e-3
def load_audio(filenames): filenames_np = filenames.numpy() batch_size = filenames_np.shape[0] X = np.empty((batch_size, 441000)) y = np.empty((batch_size, 441000)) for (i, (mix_filename, vocal_filename)) in enumerate(filenames_np): # Reading data (line, record) from the file audio_mix, sr = af.read(bytes.decode(mix_filename)) audio_vocal, sr = af.read(bytes.decode(vocal_filename)) X[i] = audio_mix y[i] = audio_vocal return X, y
def test_read(tmpdir, duration, offset): file = str(tmpdir.join('signal.wav')) sampling_rate = 8000 signal = sine( duration=0.1, sampling_rate=sampling_rate, channels=1, ) af.write(file, signal, sampling_rate) sig, fs = af.read(file, duration=duration, offset=offset) assert sig.shape == (0, ) assert fs == sampling_rate sig, fs = af.read(file, always_2d=True, duration=duration, offset=offset) assert sig.shape == (1, 0) assert fs == sampling_rate
def separate_from_model(model_str,input_filename): if model_str== 'u_net_5_5': kernel_size=(5,5) if model_str== 'u_net_3_7': kernel_size=(7,3) if model_str== 'u_net_4_6': kernel_size=(6,4) audio_mix, fs = af.read(input_filename) if (model_str == 'u_net_5_5' or model_str == 'u_net_3_7' or model_str == 'u_net_4_6'): model = utls.load_unet_spleeter(kernel_size,'weights/' + model_str + '.hdf5') if fs!=44100: print("Audio must be 44.1 kHz. Exiting") audio_vocal_pred = utls.separate_from_audio(audio_mix,44100,model,wiener_filter=True) audio_acc_pred = audio_mix - audio_vocal_pred[0:len(audio_mix)] else: audio_prepared = torch.Tensor(audio_mix) result = utls.separate_umx( audio_prepared, rate=44100, model_str_or_path="weights/model5", targets=['vocals'], residual=True) audio_vocal_pred = np.array(result['vocals'][0][0]) audio_acc_pred = audio_mix[:len(audio_vocal_pred)] - audio_vocal_pred[:len(audio_mix)] base_folder = './audio/' basename = os.path.splitext(os.path.basename(input_filename))[0] af.write(base_folder + basename + '_vocals_pred.wav', audio_vocal_pred, 44100) af.write(base_folder + basename + '_acc_pred.wav',audio_acc_pred,44100)
def main(p: Plot): p.plot_size = 4 p.setup() p.draw_bounding_box() num_bins = round(100/p.inches_to_units(0.02)) audio = af.read('soundwave/caves.m4a')[0] # take abs to get magnitude audio = np.abs(audio) # sum to mono audio = np.sum(audio, axis=0) # pad end to nearest multiple of bin width pad_len = math.ceil(audio.shape[0]/num_bins)*num_bins - audio.shape[0] audio = np.pad(audio, (0, pad_len)) # reshape into 2d array of bins audio = np.reshape(audio, (num_bins, -1)) # sum bins to 1d array of values audio = np.sum(audio, axis=1) # normalize audio = audio/np.max(audio) max_width = 30 for i, sample in enumerate(audio): x = 100*i/len(audio) p.goto(x, 50 + max_width*sample) p.lineto(x, 50 - max_width*sample)
def read_dataset(dimension, files_format): data = [] y_evaluator = [] limit_songs = 20 for folder in range(9): continuos_error = 0 print("Leyendo dataset en folder " + str(folder + 1)) directory = "local_ds/" + files_format + "/" + str( dimension[0]) + "-" + str( dimension[1]) + "/" + str(folder + 1) + "/" songs_dir = 1 for song_dirname in os.listdir(directory): try: signal, sampling_rate = af.read(directory + song_dirname) song_reshaped = np.reshape(signal, newshape=(dimension[0], dimension[1], 2)) data.append(song_reshaped) y_evaluator.append(folder) continuos_error = 0 songs_dir += 1 #if songs_dir>=limit_songs and dimension[0]==4: # break except: continuos_error += 1 if continuos_error == 5: break categorical_y = tf.compat.v1.keras.utils.to_categorical( np.array(y_evaluator), num_classes=9) categorical_y = tf.constant(categorical_y) return np.array(data), categorical_y.numpy()
def test_mp3(tmpdir, magnitude, sampling_rate, channels): # Currently we are not able to setup the Windows runner with MP3 support # https://github.com/audeering/audiofile/issues/51 if sys.platform == 'win32': return signal = sine(magnitude=magnitude, sampling_rate=sampling_rate, channels=channels) # Create wav file and use sox to convert to mp3 wav_file = str(tmpdir.join('signal.wav')) mp3_file = str(tmpdir.join('signal.mp3')) af.write(wav_file, signal, sampling_rate) subprocess.call(['sox', wav_file, mp3_file]) assert audeer.file_extension(mp3_file) == 'mp3' sig, fs = af.read(mp3_file) assert_allclose(_magnitude(sig), magnitude, rtol=0, atol=tolerance(16)) assert fs == sampling_rate assert _channels(sig) == channels if channels == 1: assert sig.ndim == 1 else: assert sig.ndim == 2 assert af.channels(mp3_file) == _channels(sig) assert af.sampling_rate(mp3_file) == sampling_rate assert af.samples(mp3_file) == _samples(sig) assert af.duration(mp3_file) == _duration(sig, sampling_rate) assert af.duration(mp3_file, sloppy=True) == sox.file_info.duration(mp3_file) assert af.bit_depth(mp3_file) is None # Test additional arguments to read with sox offset = 0.1 duration = 0.5 sig, fs = af.read(mp3_file, offset=offset, duration=duration) assert _duration(sig, sampling_rate) == duration sig, fs = af.read(mp3_file, offset=offset) # Don't test for 48000 Hz and 2 channels # https://github.com/audeering/audiofile/issues/23 if not (sampling_rate == 48000 and channels == 2): assert_allclose( _duration(sig, sampling_rate), af.duration(mp3_file) - offset, rtol=0, atol=tolerance('duration', sampling_rate), )
def play_file(path: str, *, start: float = 0, end: float = None): global _play_channel duration = end - start if end else None x, sr = af.read(path, offset=start, duration=duration, always_2d=True) if _play_channel is not None: if _play_channel >= x.shape[0]: warnings.warn('Cannot play channel ', _play_channel) return x = x[_play_channel, :] play(x.transpose(), sr)
def featExtractWriter(wavPath, cmn=True): y, sr = af.read(wavPath) featMfcc = mfcc(y, sr, winfunc=np.hamming, **kwargs) featLogfbank = logfbank(y, sr, **kwargs) featFbank = fbank(y, sr, winfunc=np.hamming, **kwargs)[0] if cmn: featMfcc -= np.mean(featMfcc, axis=0, keepdims=True) featLogfbank -= np.mean(featLogfbank, axis=0, keepdims=True) featFbank -= np.mean(featFbank, axis=0, keepdims=True) return (featMfcc, featLogfbank, featFbank)
def on_uploadAudio_clicked(self): self.fullPath, filterReturn = FileDialog.getOpenFileName(self, 'Select .wav file', self.defaultOpenPath, '*.wav') print(self.fullPath) self.filenameData = util.splitext((os.path.basename(self.fullPath))) self.filename = self.filenameData[0] + self.filenameData[1] print('Audio File Grabbed: ' + self.filename) self.audioLabel.setText('Audio File: ' + self.filename) self.sig, self.fs = af.read(self.filename) pygame.mixer.music.load(self.filename) print('Original Sampling Rate: ' + str(self.fs) + ' Hz') self.fsLabel.setText('Original Sampling Rate: ' + str(self.fs) + ' Hz')
def write_and_read( file, signal, sampling_rate, bit_depth=16, always_2d=False, normalize=False, ): """Write and read audio files.""" af.write(file, signal, sampling_rate, bit_depth, normalize) return af.read(file, always_2d=always_2d)
def test_formats(): files = [ 'gs-16b-1c-44100hz.opus', 'gs-16b-1c-8000hz.amr', 'gs-16b-1c-44100hz.m4a', 'gs-16b-1c-44100hz.aac', ] header_durations = [ # as given by mediainfo 15.839, 15.840000, 15.833, None, ] files = [os.path.join(ASSETS_DIR, f) for f in files] for file, header_duration in zip(files, header_durations): signal, sampling_rate = af.read(file) assert af.channels(file) == _channels(signal) assert af.sampling_rate(file) == sampling_rate assert af.samples(file) == _samples(signal) duration = _duration(signal, sampling_rate) assert af.duration(file) == duration if header_duration is None: # Here we expect samplewise precision assert af.duration(file, sloppy=True) == duration else: # Here we expect limited precision # as the results differ between soxi and mediainfo precision = 1 sloppy_duration = round(af.duration(file, sloppy=True), precision) header_duration = round(header_duration, precision) assert sloppy_duration == header_duration assert af.bit_depth(file) is None if file.endswith('m4a'): # Test additional arguments to read with ffmpeg offset = 0.1 duration = 0.5 sig, fs = af.read(file, offset=offset, duration=duration) assert _duration(sig, sampling_rate) == duration sig, fs = af.read(file, offset=offset) assert _duration(sig, sampling_rate) == af.duration(file) - offset
def mix_songs(song_list, cmn=True): n = len(song_list) wts = np.random.dirichlet(np.ones(n),size=1)[0] for i in range(n): song, sr = af.read(song_list[i]) if i==0: mixed_signal = song*wts[i] continue mixed_signal += song*wts[i] if cmn: mixed_signal -= np.mean(mixed_signal,axis=0, keepdims=True) return mixed_signal
def test_broken_file(non_audio_file): # Only match the beginning of error message # as the default soundfile message differs at the end on macOS error_msg = 'Error opening' # Reading file with pytest.raises(RuntimeError, match=error_msg): af.read(non_audio_file) # Metadata if audeer.file_extension(non_audio_file) == 'wav': with pytest.raises(RuntimeError, match=error_msg): af.bit_depth(non_audio_file) else: assert af.bit_depth(non_audio_file) is None with pytest.raises(RuntimeError, match=error_msg): af.channels(non_audio_file) with pytest.raises(RuntimeError, match=error_msg): af.duration(non_audio_file) with pytest.raises(RuntimeError, match=error_msg): af.samples(non_audio_file) with pytest.raises(RuntimeError, match=error_msg): af.sampling_rate(non_audio_file)
def opensmileTrial(): signal, sampling_rate = audiofile.read("audio/1s.wav", always_2d=True) # wf = wave.open("audio/1s.wav", 'rb') # signal = wf.readframes(4096) smile = opensmile.Smile( feature_set='conf/alqudah_live.conf', feature_level='features', num_channels=2, ) print(signal.shape) result = smile.process_signal(signal[:, :4096], sampling_rate) print(result)
def read_audio(filename_in, mode="audiofile", sr=None, mean_norm=False): """ Input : file name Return : waveform in np.float16, range [-1,1] mode=="scipy" : will read in int16, which we will convert to float and divide by 2**15 mean_norm=True if want to return the mean_normalised waveform matlab reads in the same way as librosa do. If read audio is in different channels : use def make_single_channel() """ if mode=="wave": with wave.open(filename_in) as w: data = w.readframes(w.getnframes()) sig = np.frombuffer(data, dtype='<i2').reshape(-1, channels) normalized = utility.pcm2float(sig, np.float32) sound = normalized # # sound_wav = wave.open(filename_in) # n = sound_wav.getnframes() # sound = sound_wav.readframes(n) # debug_cannot_extract_array_values # sound_fs = sound_wav.getframerate() elif mode=="scipy": [sound_fs, sound] = scipy.io.wavfile.read(filename_in) if sr and sr!=sound_fs: sound = resampy.resample(sound, sound_fs, sr, axis=0) sound_fs=sr assert sound.dtype=='int16' sound = 1.*sound elif mode=="librosa": # must define sr=None to get native sampling rate sound, sound_fs = librosa.load(filename_in,sr=sr) # sound *= 2**15 elif mode=="soundfile": sound, sound_fs = sf.read(filename_in) if sr and sr!=sound_fs: sound = resampy.resample(sound, sound_fs, sr, axis=0) sound_fs=sr elif mode=="audiofile": sound, sound_fs = af.read(filename_in) if sr and sr!=sound_fs: sound = resampy.resample(sound, sound_fs, sr, axis=0) sound_fs=sr if mean_norm: sound -= sound.mean() return sound, sound_fs
def test_empty_file(empty_file): # Reading file signal, sampling_rate = af.read(empty_file) assert len(signal) == 0 # Metadata for sloppy in [True, False]: assert af.duration(empty_file, sloppy=sloppy) == 0.0 assert af.channels(empty_file) == 1 assert af.sampling_rate(empty_file) == sampling_rate assert af.samples(empty_file) == 0 if audeer.file_extension(empty_file) == 'wav': assert af.bit_depth(empty_file) == 16 else: assert af.bit_depth(empty_file) is None
def load( filename, *, duration=None, offset=0, ): r"""Load audio file. If an error occurrs during loading as the file could not be found, is empty, or has the wrong format an empty signal is returned and a warning shown. Args: file (str or int or file-like object): file name of input audio file duration (float, optional): return only a specified duration in seconds. Default: `None` offset (float, optional): start reading at offset in seconds. Default: `0` Returns: tuple: * **numpy.ndarray**: two-dimensional array with shape `(channels, samples)` * **int**: sample rate of the audio file Example: >>> signal, sampling_rate = load('speech.wav') """ signal = np.array([[]]) # empty signal of shape (1, 0) sampling_rate = None try: signal, sampling_rate = af.read(filename, duration=duration, offset=offset, always_2d=True) except ValueError: warn(f'File opening error for: {filename}', UserWarning) except (IOError, FileNotFoundError): warn(f'File does not exist: {filename}', UserWarning) except RuntimeError: warn(f'Runtime error for file: {filename}', UserWarning) except subprocess.CalledProcessError: warn(f'ffmpeg conversion failed for: {filename}', UserWarning) return signal, sampling_rate
def read_audio( path: str, start: pd.Timedelta = None, end: pd.Timedelta = None, channel: int = None, ) -> typing.Tuple[np.ndarray, int]: # pragma: no cover """Reads (segment of an) audio file. Args: path: path to audio file start: read from this position end: read until this position channel: channel number Returns: signal: array with signal values in shape ``(channels, samples)`` sampling_rate: sampling rate in Hz """ if start is None or pd.isna(start): offset = 0 else: offset = start.total_seconds() if end is None or pd.isna(end): duration = None else: duration = end.total_seconds() - offset # load raw audio signal, sampling_rate = af.read( audeer.safe_path(path), always_2d=True, offset=offset, duration=duration, ) # mix down if channel is not None: if channel < 0 or channel >= signal.shape[0]: raise ValueError(f'We need 0<=channel<{signal.shape[0]}, ' f'but we have channel={channel}.') signal = signal[channel, :] return signal, sampling_rate
def test_signal(file, feature_set, feature_level): # create feature extractor fex = opensmile.Smile(feature_set, feature_level) # extract from numpy array x, sr = audiofile.read(file, always_2d=True) y = fex.process_signal(x, sr) y_file = fex.process_file(file) with pytest.warns(UserWarning): y_empty = fex.process_signal(x[0, :10], sr) # assertions assert fex.feature_names == y.columns.to_list() np.testing.assert_equal(y.values, y_file.values) assert all(y_empty.isna())
def _gen_summary(source, dest): # Read the audio data. signal, samplerate = audiofile.read(source) # Normalize to mono 22k for consistent analysis. signal, samplerate = _normalize(signal, samplerate) # Find the most representative 30 seconds to use as a summary clip. print " analyze" clip = summary.generate(signal, samplerate, duration=30.0) # Write the summary as a 16-bit WAV. print " write summary" wf = wave.open(dest, 'wb') if wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(int(samplerate)) for s in (clip * np.iinfo(np.int16).max).astype(np.int16): wf.writeframesraw(struct.pack('<h', s)) wf.writeframes('') wf.close()
def read_audio( file: str, *, start: pd.Timedelta = None, end: pd.Timedelta = None, root: str = None, ) -> typing.Tuple[np.ndarray, int]: """Reads (segment of an) audio file. Args: file: path to audio file start: read from this position end: read until this position root: root folder Returns: signal: array with signal values in shape ``(channels, samples)`` sampling_rate: sampling rate in Hz """ if root is not None and not os.path.isabs(file): file = os.path.join(root, file) if start is None or pd.isna(start): offset = 0 else: offset = start.total_seconds() if end is None or pd.isna(end): duration = None else: duration = end.total_seconds() - offset signal, sampling_rate = af.read( audeer.safe_path(file), always_2d=True, offset=offset, duration=duration, ) return signal, sampling_rate
def test_custom(config, level): # create feature extractor fex = opensmile.Smile(config, level) # extract from file y_file = fex.process_file(pytest.WAV_FILE) # extract from array x, sr = audiofile.read(pytest.WAV_FILE) y_array = fex.process_signal(x, sr, file=pytest.WAV_FILE) # assertions assert fex.config_name == audeer.basename_wo_ext(config) assert fex.config_path == audeer.safe_path(config) assert fex.num_features == len(fex.feature_names) assert fex.feature_names == y_file.columns.to_list() pd.testing.assert_frame_equal(y_file, y_array)
def read_audio(filename_in, mode="audiofile", sr=None, mean_norm=False): ## Reading the audio if mode == "librosa": # must define sr=None to get native sampling rate sound, sound_fs = librosa.load(filename_in, sr=sr) # sound *= 2**15 elif mode == "soundfile": sound, sound_fs = sf.read(filename_in) elif mode == "audiofile": sound, sound_fs = af.read(filename_in) else: print('mode:{} is incorrect should be librosa/soundfile/audiofile'. format(mode)) ## Resampling if sr and sr != sound_fs: sound = resampy.resample(sound, sound_fs, sr, axis=0) sound_fs = sr ## Zero-mean if mean_norm: sound -= sound.mean() return sound, sound_fs
'1fc348560267afb3005ff1d9266f2959', 'ceb797950f0995cd4aaf8e1b931a9598', '1f83dae55598b76da0055445d60f43ce', 'cb7a02a14d6609d5976effd4645bc41d', '3511c76a82820e49474890f401925b3a', 'cf33de7b6d85d7345f82acd39787717f', '6d8998ea8704af6685a50219c9dd3747', '2f8ddcfcb5a4419b16c533808ed9c38e', '57e7a29c01c11136336257b324b7a3af', 'f0132034f1bc1a891841d6e8bbb6eb1a', 'd50f6c0da77b2223568ad042ea035a5e', '28081878fb0d5dbaac595fb031bb4e43', '5b67a12483bb2bc7eed1b214398bac9c', 'ae54788ccd8cc8b67e121f49e747f548', '9a3614ec10dcaeb5ee53088d939bbd6b', '56a336e76f530ef57ae1e7f0a156e8c0', '3ec0eea351847a9def135aa2b322637f', '9ea4000febe9fab7c8560437103192c6', '43bdf492e2b6bda5ec869dc003de81c1', '1e996fca7223c00c4c76b1f3b7dc1eaa', '2eef8025f3f03c9ad7532249808ab4f8' ] file = accepted[0] filepath = "./response1/" + file + ".wav" signal, sampling_rate = audiofile.read(filepath, always_2d=True) smile = opensmile.Smile( feature_set=opensmile.FeatureSet.ComParE_2016, # feature_set=opensmile.FeatureSet.eGeMAPSv02, feature_level=opensmile.FeatureLevel.Functionals, ) # print(smile.feature_names) data = smile.process_signal(signal, sampling_rate) data.insert(loc=0, column='key', value=file) # %% ''' FOLLOWING FILES '''
def load_single_audio(filenames): filenames_np = filenames.numpy() (mix_filename, vocal_filename) = filenames_np audio_mix, sr = af.read(bytes.decode(mix_filename)) audio_vocal, sr = af.read(bytes.decode(vocal_filename)) return audio_mix, audio_vocal
import scipy.io.wavfile as wavf import matplotlib.pyplot as plt import numpy as np import audiofile as af import ffmpeg import pygame import time import sounddevice as sd pygame.init() pygame.mixer.init() newaudio = [] # scaled [-1 1] sig, fs = af.read('C:/Users/Owner/Documents/EQ Project/01_Dirty_Laundry.wav') d = sig[0] / 2 + sig[1] / 2 newaudio.append(d) pygame.mixer.Sound(d) nyq = 22050 order = 10 lowcut2 = 300 / nyq lowcut3 = 1000 / nyq lowcut4 = 2500 / nyq lowcut5 = 5000 / nyq highcut1 = 299 / nyq highcut2 = 999 / nyq highcut3 = 2499 / nyq highcut4 = 4999 / nyq
import numpy as np import audiofile as af sampling_rate = 8000 noise = np.random.normal(0, 1, sampling_rate) noise /= np.amax(np.abs(noise)) af.write('MY.wav', noise, sampling_rate) af.channels('MY.wav') af.duration('MY.wav') sig, fs = af.read('MY.wav') print(fs) print(sig)
import audeer import os import glob import sys import pytest import pandas as pd import audiofile as af pytest.ROOT = os.path.dirname(os.path.realpath(__file__)) pytest.WAV_FILE = os.path.join(pytest.ROOT, 'test.wav') pytest.WAV_ARRAY, pytest.WAV_SR = af.read(pytest.WAV_FILE, always_2d=True) pytest.FRAME_LIST_STARTS = pd.to_timedelta(['1.0s', '3.0s', '4.0s']) pytest.FRAME_LIST_ENDS = pd.to_timedelta(['1.5s', '3.5s', '5.0s']) pytest.CONFIG_FILE = os.path.join(pytest.ROOT, 'test.conf') if sys.platform == "win32": # pragma: no cover platform = 'win' elif sys.platform == "darwin": # pragma: no cover platform = 'osx' else: # pragma: no cover platform = 'linux' pytest.SMILEXTRACT = audeer.safe_path( os.path.join(pytest.ROOT, '..', 'opensmile', 'core', 'bin', platform, 'SMILExtract')) @pytest.fixture(scope='session', autouse=True) def fixture_clean_session(): def clean():
def load_audiofile(fp): sig, rate = af.read(fp) return sig