def main(args): max_length = 0 min_length = 0 total_length = 0 count = 0 with open(os.path.join(args.data_dir, args.split + '.tsv'), 'r') as f: next(f) for line in f: line_split = line.split('\t') audio_fn = line_split[1] filepath = os.path.join(args.data_dir, 'clips', audio_fn[:-4] + '.wav') sr, data = read_wav(filepath) length = len(data) / sr if length > max_length: max_length = length if length < min_length or min_length == 0: min_length = length total_length += length count += 1 avg_length = total_length / count print('Total: {:.4f} s'.format(total_length)) print('Min length: {:.4f} s'.format(min_length)) print('Max length: {:.4f} s'.format(max_length)) print('Average length: {:.4f} s'.format(avg_length))
def separate(ckpt, mtgph, input_path, output_path): model = inference.SeparationModel(ckpt, mtgph) if not os.path.exists(input_path): raise Exception("Wrong input file {}".format(input_path)) file_list = [input_path] from scipy.io.wavfile import read as read_wav from scipy.io.wavfile import write as write_wav sr, f = read_wav(file_list[0]) with model.graph.as_default(): dataset = data_io.wavs_to_dataset(file_list, batch_size=1, num_samples=len(f), repeat=False) # Strip batch and mic dimensions. dataset['receiver_audio'] = dataset['receiver_audio'][0, 0] dataset['source_images'] = dataset['source_images'][0, :, 0] waveforms = model.sess.run(dataset) separated_waveforms = model.separate(waveforms['receiver_audio']) # print(separated_waveforms) # print(separated_waveforms.shape) if not os.path.exists(output_path): os.makedirs(output_path) for i in range(separated_waveforms.shape[0]): write_wav(output_path + '/sub_target{}.wav'.format(i), sr, separated_waveforms[i, :])
def Extract_Pitch(self, player_pos_x, player_pos_y, object_pox_x, object_pos_y): import librosa from scipy.io.wavfile import read as read_wav import os import numpy as np sampling_rate, data = read_wav("Test.wav") y, sr = librosa.load('Test.wav', sr=sampling_rate) pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr, fmin=75, fmax=1600) player = [player_pos_x, player_pos_y] target = [object_pox_x, object_pos_y] distance = scipy.spatial.distance.euclidean(player, target) if (distance == 0): factor = 1000 else: factor = 1000 / distance pitch_values = [] for i in range(len(pitches[0])): index = magnitudes[:, i].argmax() pitch = pitches[index, i] + factor pitch_values.append(pitch) return np.array(pitch_values)
def load_validation_set(): """ Output a tuple of features: (fft features, mfcc features, mean-std features) Description extracts three types of features from validation set. """ ffts = dict() mfccs = dict() mean_stds = dict() for i in validation_ids: path = './validation/validation.{i}.wav'.format(i=i) _, X = read_wav(path) # FFT fft = np.array(abs(sp.fft(X)[:1000])) ffts.update({i: fft}) # MFCC ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) x = np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0) mfccs.update({i: x}) # Mean-Std [Fs, x] = audioBasicIO.readAudioFile(path); F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs); mean_std = [] for f in F: mean_std.extend([f.mean(), f.std()]) mean_stds.update({i: np.array(mean_std)}) return (ffts, mfccs, mean_stds)
def convert(input_path, operation, output_type="", mp3=False, wav=True): timestamp = str(time.time()).split('.')[0] sr, y = read_wav(input_path) if operation == 'derivative': res = derivative(y, 1 / sr) elif operation == 'integral': res = integral(y, 1 / sr) else: print('ERROR: The operation ' + operation + ' is unknown!') return file_name = str(os.path.basename(input_path).split('.')[0]) file_name_complete = timestamp + '_' + file_name + '_converted_to_' + output_type output_path = 'data/outputs/' + file_name_complete os.makedirs(output_path) output_path = output_path + '/' if wav: output.write_wav(output_path + file_name_complete + '.wav', res, sr=sr, norm=True) if mp3: save_mp3(output_path + file_name_complete, res, sr) print('File converted to: ' + output_path)
def get_item(self, key): try: return self.data_storage[key] except: # Lazy loading sr, signal = read_wav(key) self.data_storage[key] = (sr, signal) return sr, signal
def _openWave(self,fileName): self.fps,self.wav_buf = read_wav(fileName) channels = self.wav_buf.ndim if channels>1: # do stereo->mono sound transform self.wav_buf = self.wav_buf.flatten()[::channels] self.blockLen = int(self.fps/VIDEO_FPS) self.blocksReaded = 0 self.blocksToRead= int(len(self.wav_buf)/self.blockLen)
def create_noise_samples(self): self.noises = [] # Extract samples of 1s (length 16000) from the background noise index for f in self.silence_index: # Load file sample_rate, signal = read_wav(f) # Extract the more sample we can of length 1s # Remove sample_rate from len(signal) to avoid a shorter noise for i in range(0, len(signal) - sample_rate, sample_rate): self.noises.append(signal[i:i + sample_rate])
def wav2guitar_distortion(inFile,outFile=None, deepValue=0.5): """ преобразовать звук в файле таким образом, как будто он прошел через дисторшн примочку""" if outFile is None: outFile = changeFileExt(inFile,'_distortion.wav') fps,data_in = read_wav(inFile) Amax = int(deepValue*np.max(data_in)) Amin = int(deepValue*np.min(data_in)) data_out = np.clip(data_in,Amin,Amax) # clip sound amplitude write_wav(outFile,fps,data_out)
def read(self, filepath): """Reads an mp3 file into a numpy array. Arguments: filepath {str} -- The path to the mp3 file """ # Convert from mp3 to wav sound = AudioSegment.from_mp3(filepath) sound.export(self.EXPORT_PATH, "wav") _, data = read_wav(self.EXPORT_PATH) return data
def get_data_from_flac(data): flac_temp = tempfile.NamedTemporaryFile(suffix=".flac") # writes data from server to local tempfile flac = open(flac_temp.name, 'w') flac.write(data) flac.close() wav_temp = tempfile.NamedTemporaryFile(suffix=".wav") sound = AudioSegment.from_file(flac_temp.name, "flac") sound.export(wav_temp.name, format="wav") bee_rate, bee_data = read_wav(wav_temp.name) flac_temp.close() wav_temp.close() return bee_rate, bee_data
def get_white_noise(noise_filename): sample_rate, white_noise_ndarray = read_wav(noise_filename) white_noise = list(white_noise_ndarray) white_noise_lower_limit = -2**15 white_noise_upper_limit = 2**16 white_noise_normalized = [ (noise_level - white_noise_lower_limit) / white_noise_upper_limit for noise_level in white_noise ] return white_noise_normalized
def huki(): just_path = 'C:/Users/chyre/Desktop/Studia/V/TM/Projekt2/potrzebne pliki/huk_wav' os.chdir(just_path) waves = [f for f in listdir() if f.endswith('.wav')] lista_hukow = [] for l in waves: lista_hukow.append(l) sampling_rate, data = read_wav(str(l)) # enter your filename x = signal.resample(data, 16000) print(x) return lista_hukow
def huki(): just_path = 'C:/Users/chyre/Desktop/Studia/V/TM/Projekt2/potrzebne pliki/znormalizowane_huki' os.chdir(just_path) waves = [f for f in listdir() if f.endswith('.wav')] lista_hukow=[] for l in waves: lista_hukow.append(l) sampling_rate, data = read_wav(str(l)) # enter your filename huk = signal.resample(data, 16000) wav.write((just_path + '/' + str(l)), 16000, huk) return lista_hukow
def _getFile(widget, name): filename = widget.text() if filename == '': raise Exception(('Cannot run convolution: must specify the {}' ).format(name)) if not os.path.exists: raise Exception( ('Cannot run convolution: path specified {} does not exist' ).format(name)) rate, data = read_wav(filename) data = pcm2float(data, 'float32') if len(data.shape) == 1: data = data.reshape((len(data), 1)) return data, rate
def get_test_data(self, batch_size, offset): total_size = len(self.test_set) real_batch_size = min(batch_size, total_size - offset) batch_data = np.zeros( (real_batch_size, self.preprocessor.fingerprint_size)) filenames = [] for i in range(offset, offset + real_batch_size): sample = self.test_set[i] # Load wav sr, signal = read_wav(sample['path']) filenames.append(sample['filename']) # Check signal length signal = self.preprocessor.check_audio_length(signal) # Featurize batch_data[i - offset, :] = self.preprocessor.get_log_mel_spectrograms( signal) return batch_data, filenames
def create_mfcc_array(output_path='data/mfcc_features.npy', wav_list=WAV_LIST, n_mfcc=N_MFCC, sample_rate=SAMPLE_RATE, duration=DURATION, window_length=WINDOW_LENGTH, step=STEP, n_fft=N_FFT): n_frames = int(duration / step) - 1 mfcc_features = np.zeros((len(wav_list), n_frames, n_mfcc)) for i, wav_path in enumerate(wav_list): rate, signal = read_wav(wav_path) mfcc_features[i] = psf.mfcc(signal[:(duration * sample_rate)], samplerate=sample_rate, winlen=window_length, winstep=step, numcep=n_mfcc, nfft=n_fft) np.save(output_path, mfcc_features) return mfcc_features
from scipy.io.wavfile import read as read_wav import os #os.chdir('path') # change to the file directory sampling_rate, data = read_wav("4.wav") # enter your filename print(sampling_rate)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed May 29 10:02:44 2019 @author: tim """ # Going to put clips and information into same format as tutorial # Could change this at a later date, by changing format that the tutorial accepts # but for the moment, I'll keep using the instruments.csv file to record the # the files and their class # So, loop through sub directories in the tagged_recordings folder # copy them into the audio_files_cacophony folder (cf wavfiles folder in the tutorial) # and insert a row in the tags.csv file (cf the instruments folder of the tutorial) # Test can load the file format import os from tqdm import tqdm import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.io import wavfile from scipy.io.wavfile import read as read_wav from python_speech_features import mfcc, logfbank import librosa as librosa sampling_rate, data=read_wav('wavfiles/'+wav_file)
~~~~~~~~~~~~~~ Doing quick pythonic stuff. """ from scipy.io.wavfile import read as read_wav from matplotlib import pyplot as plt, patches as mpl_patches, rcParams as mpl_params from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset MAX_INT16 = 32768 mpl_params['axes.labelsize'] = 24 mpl_params['xtick.labelsize'] = 20 mpl_params['ytick.labelsize'] = 20 if __name__ == '__main__': sampling_rate, signal = read_wav( '../../lubos/data/wavs/SADB-2010-NTB/_data8kHz-cut/spk-1/spk-1-1.wav') signal = [float(s) / MAX_INT16 for s in signal] # normalisation duration = float(len(signal)) / sampling_rate win_len = 0.025 win_shift = 0.01 fig, ax = plt.subplots() ax.plot([float(s) / sampling_rate for s in range(len(signal))], signal, alpha=0.3) ax.add_patch( mpl_patches.Rectangle((1, -0.01), 0.1, 0.02, fill=False, color='red')) col = ('magenta', 'red', 'orange', 'maroon') for i in range(int(duration / win_shift)): ax.add_patch( mpl_patches.Rectangle((i * win_shift, -0.48),
def read_path(self): rate, data = read_wav(self.path) self.rate = rate self.data = data
def open_wave(filename): return read_wav(filename)
t_1 = 0 for n, t in zip(subject_trials['Subject{}'.format(subject_i)], range(1, np.shape(events_times)[0] + 1)): trial = np.transpose(data[t_1:t, :, :], (0, 2, 1)) trial_reshape = np.concatenate(np.split(trial, n_splits, axis=1), axis=0) eegs.append(trial_reshape) t_1 = t audio_path_20000 = os.path.join(FULLAUDIODIR, r'20000/20000_{}.wav'.format(n)) audio_path_Journey = os.path.join(FULLAUDIODIR, r'Journey/Journey_{}.wav'.format(n)) _, twenty = read_wav(audio_path_20000) _, journey = read_wav(audio_path_Journey) twenty = rms_normalize(twenty[0:2646000]) journey = rms_normalize(journey[0:2646000]) clean_sound = twenty if subject_i <= 17 else journey unattended_sound = twenty if subject_i > 17 else journey clean_sound_reshape = np.concatenate(np.split(clean_sound[None], n_splits, axis=1), axis=0) unattended_sound = np.concatenate(np.split(unattended_sound[None], n_splits, axis=1), axis=0)
plt.tight_layout() plt.show() return filters if __name__ == '__main__': ''' Settings ''' args = parse_arguments() input_file = args.input_file # .wav file to process ms_dur = args.microsegment_dur # microsegment duration [s] ms_shift_dur = args.shift_dur # microsegment shifting duration [s] verbose = args.verbose # print analysis and plot the mfcc? show_filters = args.show_filters # show the mfcc filters? ''' Read, normalize and analyze wav signal ''' f_s, s_raw = read_wav(input_file) # sampling frequency [Hz], raw signal s = normalize(s=s_raw) # signal mapped to <-1,1> s_len = len(s) # signal length s_dur = float(s_len) / f_s # signal duration [s] ms_len = int((ms_dur / s_dur) * s_len) # microsegment length ms_shift = int((ms_shift_dur / s_dur) * s_len) # microsegment shift n_ms = int( (s_len - ms_len) / ms_shift) # number of microsegments (windows) bw = f_s / 2 # transmitted band [Hz] bw_m = int(hz2mel(bw)) # transmitted band [mel] n_filters = 40 # number of filters db_m = float(bw_m) / (n_filters + 1) # delta m (for filter shift in mels) filter_len = (ms_len / 2) # filter and also the final microsegment length f_def_step = float( bw) / filter_len # frequency step, where the filter is defined [Hz] f_def_step_m = float(
keys = matstruct_contents[0, 0].dtype.fields.keys() for t in range(20): val = matstruct_contents[0, t] if val['repetition'][0][0][0][0] == 0: part = val['part'][0][0][0][0] attended_track = val['attended_track'][0][0][0][0] tracks = [1, 2] tracks.remove(attended_track) unattended_track = tracks[0] attended_sound_name = 'part{}_track{}_dry.wav'.format( part, attended_track) unattended_sound_name = 'part{}_track{}_dry.wav'.format( part, unattended_track) afs, attended_sound = read_wav( os.path.join(SOUNDDIR, attended_sound_name)) attended_sound = rms_normalize( np.copy(attended_sound).astype(float)) ufs, unattended_sound = read_wav( os.path.join(SOUNDDIR, unattended_sound_name)) unattended_sound = rms_normalize( np.copy(unattended_sound).astype(float)) eeg = val['RawData'][0][0][0][0][1] attended_sound = attended_sound[:int(min_eeg_duration * afs)] unattended_sound = unattended_sound[:int(min_eeg_duration * ufs)] eeg = eeg[:int(min_eeg_duration * 128)] eeg_reshape = np.concatenate(np.split(eeg[None],
import numpy import numpy as np import pylab from matplotlib.cm import get_cmap from matplotlib.pyplot import show, plot, title, figure, xlabel, ylabel, specgram, colorbar, stem from matplotlib.pyplot import subplot, tight_layout from scipy.fftpack import fft from scipy.io.wavfile import read as read_wav from scipy.io.wavfile import write install(numpy) install(matplotlib) install(librosa) if __name__ == "__main__": sampling_rate, data = read_wav("man_voice.wav") fft_out = fft(data) fnames = ["man_voice.wav", "szum.wav"] wavs = [wave.open(fn) for fn in fnames] frames = [w.readframes(w.getnframes()) for w in wavs] samples = [np.frombuffer(f, dtype='<i2') for f in frames] samples = [samp.astype(np.float64) for samp in samples] n = min(map(len, samples)) mix = samples[0][:n] + samples[1][:n] mix_wav = wave.open("./mix.wav", 'w') mix_wav.setparams(wavs[0].getparams()) mix_wav.writeframes(mix.astype('<i2').tobytes()) mix_wav.close()
def __init__(self, wav_file_path): self.fs, samples = read_wav(wav_file_path) super(WavPlayer, self).__init__(samples if np.isscalar(samples[0]) else samples[:,0])
def r4(): from scipy.io.wavfile import read as read_wav import os sampling_rate, data = read_wav( "dataset/bubbling/00.wav") # enter your filename print(sampling_rate)
def __get_fs_array_from_audio(self, audio_file): sampling_rate, data=read_wav(audio_file) return (sampling_rate, data)
import json import argparse import numpy as np from glob import glob from chirp import Chirp from beeper import Beeper from speaker import Speaker from datetime import datetime from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util import sounddevice as sd from scipy.io.wavfile import read as read_wav error_sampling, error_wav=read_wav("error.wav") def play_error(): sd.play(error_wav,error_sampling) ready_sampling, ready_wav=read_wav("ready.wav") def play_ready(): sd.play(ready_wav,ready_sampling) def get_output(mode, guide_tolerance, guide_delay): guide = None # guide_tolerance = 0.15 # guide_delay = 0 if mode == "speaker": guide = Speaker(tolerance=guide_tolerance, delay=guide_delay) elif mode == "beeper": guide = Beeper(tolerance=guide_tolerance, delay=guide_delay)
path_myrecording = f"/Users/yuxinzhu/.spyder-py3/StreamlitSD/{filename}.wav" save_record(path_myrecording, myrecording, fs) record_state.text(f"Done! Saved sample as {filename}.wav") st.audio(read_audio(path_myrecording)) fig = create_spectrogram(path_myrecording) st.pyplot(fig) ################ TO OBTAIN THE WEB-RECORDING .WAV SAMPLING RATE from scipy.io.wavfile import read as read_wav import os os.chdir('/Users/yuxinzhu/.spyder-py3/StreamlitSD/' ) # change to the file directory sampling_rate, data1 = read_wav( f"{filename}.wav") # enter your filename print(sampling_rate) st.markdown('*Vocal Recording Sampling Rate For:* **output.wav**') st.markdown(sampling_rate) ################################ st.write( "**NOTE:** Please continue to record even if there is an *starting error shown below. * Thank You !" ) ################ TO UPLOAD DATA st.title("Vocal Sample File Upload") file_to_be_uploaded = st.file_uploader("Choose an audio file to upload", type="wav") file_to_be_uploaded = st.file_uploader("Choose an audio file to upload", type="mp3") ################################