def analyse_rec(sound_files, nsources=1, wind_sec=0.092, min_len=.3, recognise=None, output_csv='', output_text_grid=''): # segment recordings w=[] for ff in sound_files: sr,wi=wavread(ff) w.append(wi.T) w=np.vstack(w).T sys.stderr.write("Read {} files, {} channels, {} samples\n"\ .format(len(sound_files),w.shape[1],w.shape[0])) sys.stderr.write("Segmenting audio\n") if nsources>1: seg = MultiChannelSegmenter(w,sr=sr,min_len=min_len) else: #w=w.squeeze() if len(w.shape)>1: w = np.mean(w,axis=1) seg = SilenceDetector(w.squeeze(), sr=sr, method = 'pct05', min_len=min_len, wind_sec=wind_sec) seg.label = [1 for tst in seg.tst] seg.centers = np.array([[0,0],[1,0]]) if recognise: seg.recognise(mode=recognise) sys.stderr.write("Found {} chunks\n".format(len(seg.label))) output_results(seg, output_csv=output_csv, output_text_grid=output_text_grid)
def compare(control_path, exp_path): """ Compares two wav files and returns a score. Uses mel frequency ceptrum coefficients as well as dynamic time warping. :param control_path: the 'correct' wav - what you are comparing to :param exp_path: the unknown wav """ (rate,sig) = wavread(control_path) (rate2,sig2) = wavread(exp_path) x = mfcc(sig,rate) y = mfcc(sig2,rate2) dist, cost, acc = dtw.dtw(x, y, dist=lambda x, y: dtw.norm(x - y, ord=1))\ return dist
def load_wav(filename): if filename.endswith('.wav'): fs, x = wavread(filename) if fs != 8000: x = resample(x, int(16000/fs*len(x))) return x return np.array([])
def load_vgmwav(wav_fp): fs, wav = wavread(wav_fp) assert fs == 44100 if wav.ndim == 2: wav = wav[:, 0] wav = wav.astype(np.float32) wav /= 32767. return wav
def example(): sig = wavread("ISSpkt.wav")[1] NRZIa = nc_afskDemod(sig) fig = plt.figure(figsize=(16,4)) plt.plot(NRZIa) NRZI = np.sign(NRZIa) packets ,lastflag = detectFrames(NRZI) ax = decodeAX25(packets[0]) print("Dest: %s | Source: %s | Digis: %s | %s |" %(ax.destination ,ax.source ,ax.digipeaters,ax.info)) print lastflag
def get_click_sounds(): """ http://127.0.0.1:5000/get_tabla_sounds simple! and you get the json data :) """ #read a wav sound output = {} for stroke in clickStrokes.keys(): fs, data = wavread(clickStrokes[stroke]) output[stroke] = data.tolist() return jsonify(**output)
def __init__(self, _file, channel=0): self.ltsa = None if isinstance(_file, str) and _file[-4:] == '.wav': self.fs, self.signal = wavread(_file) if self.signal.ndim > 1: self.signal = self.signal[:,channel] # take only one channel else: raise TypeError('Input is not a path to a .wav file: %s' % str(_file)) self._init_params()
def make_features(wav_dir, mfcc_dir, energy=False, n=13): if not os.path.exists(mfcc_dir): os.mkdir(mfcc_dir) for f in os.listdir(wav_dir): if f.endswith('.wav'): fs, w = wavread(wav_dir + '/' + f) m = mfcc(w, samplerate=fs, appendEnergy=energy, numcep=n) mean=m.mean(axis=0) std=m.std(axis=0) m=(m-mean)/std np.save(mfcc_dir + '/' + f[:-3] + 'npy', m)
def mixsounds(): """Return 9 linear mixtures of sound signals. The sound signals have to be in '.../sources/'. """ files = [('../sources/source%i.wav' % i) for i in range(1,10)] source = np.zeros((50000,9)) for i in range(9): source[:,i] = wavread(files[i])[1] source -= np.mean(source, 0) mix = np.random.rand(9,9) data = np.dot(source, mix) return data
def test_decoding(): # Load ISS Packet Qin = Queue.Queue() sig = wavread("ISSpkt_full.wav")[1] print len(sig) for n in r_[0:len(sig):1024]: Qin.put(sig[n:n+1024]) Qin.put("END") length = 43 end = False count = 1 while(Qin.not_empty): buf = np.array([]) for i in range(length): chunk = Qin.get() if chunk == "END": print chunk end = True break else: buf = np.append(buf, chunk) NRZIa = nc_afskDemod(buf) NRZI = np.sign(NRZIa) packets, lastflag = detectFrames(NRZI) # make recursive? while(lastflag > 0): for i in range(20): chunk = Qin.get() if chunk == "END": print chunk end = True break else: buf = np.append(buf, chunk) NRZIa = nc_afskDemod(buf) NRZI = np.sign(NRZIa) packets, lastflag = detectFrames(NRZI) if lastflag>0: print lastflag for p in packets: #print "%d. %s"%(count, str(decodeAX25(p))) ax = decodeAX25(p) print ("%d. Dest: %s | Source: %s | Digis: %s | %s" %(count, ax.destination ,ax.source , ax.digipeaters, ax.info)) count += 1 if end: return
def __init__(self, file_path, verbose=False): if verbose: print 'Read the audio file:', file_path try: sr, sig = wavread(file_path) except IOError: print "Error: can\'t read the audio file:", file_path else: if verbose: print '\tSuccessful read of the audio file:', file_path self.sr = sr self.sig_int = sig self.sig_float = pcm2float(sig,dtype='float64') self.niquist = sr/2 self.file_path = file_path self.file_name = basename(file_path) self.filtered = False self.duration = len(sig)/float(sr) self.indices = dict() # empty dictionary of Index
def main(**kwargs): outfile = kwargs['outfile'][0] infile = kwargs['infile'] print "Filtering %s to %s" % (infile, outfile) rate, sound_samples = wavread(infile) mono = True if 'ndarray' in str(type(sound_samples[0])): mono = False # data,r = ffmpeg_load_audio('32but.wav', 44100, True, dtype=np.float32) rate, sound_samples = ffmpeg_load_audio(infile, rate, mono, dtype=np.float32) fs = 44100.0 lowcut = 100.0 highcut = 3000.0 # b,a = butter_bandpass(lowcut, highcut, fs, 5) # filtered = lfilter(b, a, sound_samples) # filtered = butter_bandpass_filter(sound_samples, lowcut, highcut, fs, 5) # filtered = butter_bandpass_filter_two(sound_samples, lowcut, highcut, fs, 5) wavwrite(outfile, rate, sound_samples)
def create_ceps(path): sample_rate, X = wavread(path) ceps, mspec, spec = mfcc(X) write_ceps(ceps, path)
all_electrodes = ((0, ), (0, ), (1, )) waveform_means = [np.random.randn(30, 1) for _ in range(3)] for spike_times, electrodes, waveform_mean in \ zip(all_spike_times, all_electrodes, waveform_means): nwbfile.add_unit(spike_times=spike_times, electrodes=electrodes, waveform_mean=waveform_mean) # analog data # microphone data # Be careful! This might contain identifying information mic_path = '/Users/bendichter/Desktop/Chang/video_abstract/word_emphasis.wav' mic_fs, mic_data = wavread(mic_path) nwbfile.add_acquisition( TimeSeries('microphone', mic_data, 'audio unit', rate=float(mic_fs), description="audio recording from microphone in room")) # all analog data can be added like the microphone example (speaker, button press, etc.) spk_path = '/Users/bendichter/Desktop/Chang/video_abstract/word_emphasis.wav' spk_fs, spk_data = wavread(spk_path) nwbfile.add_stimulus( TimeSeries('speaker1', spk_data, 'audio unit', rate=float(spk_fs), description="speaker recording"))
#!/usr/bin/env python import numpy as np from scipy.io.wavfile import read as wavread from scipy.io.wavfile import write as wavwrite from sklearn.metrics import mean_squared_error # Print entire, readable ndarrays np.set_printoptions(precision=3) np.set_printoptions(suppress=True) np.set_printoptions(threshold=np.nan) f_true, data_true = wavread('umbrella.wav') f_user, data_user = wavread('cucumberfiltered.wav') zero_array = np.zeros(3746, dtype=np.float) data_true = np.concatenate([data_true, zero_array]) print mean_squared_error(data_true, data_user) fft_true = np.abs(np.fft.fft(data_true))**2 fft_user = np.abs(np.fft.fft(data_user))**2 print mean_squared_error(fft_true, fft_user) f_true, data_true = wavread('umbrella.wav') f_user, data_user = wavread('umbrellaonefiltered.wav') zero_array = np.zeros(4770, dtype=np.float) data_true = np.concatenate([data_true, zero_array])
def __init__(self, contours, neutral, SHOW_LINGUAGRAM, SHOW_NEUTRAL, SHOW_WAVEFORM, SHOW_SPECTROGRAM): '''center points determined by transforming the point (426, 393) several times with peterotron, and taking the average. ''' self.static_dir = os.getcwd() + '/' #self.centerX = 710 #self.centerY = 638 # these come from hand tuning to find the smallest range of y values of polar mags self.centerX = 665 self.centerY = 525 self.gladefile = self.static_dir + "LinguaViewer.glade" self.wTree = gtk.glade.XML(self.gladefile, "window1") self.win = self.wTree.get_widget("window1") self.win.set_title(contours) self.title = contours self.mainVBox = self.wTree.get_widget("vbox1") dic = { "on_window1_destroy": self.onDestroy, "on_tbPlay_clicked" : self.playSound, "on_tbSave_clicked" : self.onSave, "on_tbLabel_clicked": self.onLabel} self.wTree.signal_autoconnect(dic) self.X, self.Y = self.loadContours(contours) self.wavname = contours[:-4] + ".wav" #Linguagram if (SHOW_LINGUAGRAM == True): x1 = array(self.X) y1 = array(self.Y) Z = [] for i in range(len(self.X)): zs = [] for j in range(32): zs.append(i+1) Z.append(zs) z1 = array(Z) self.fig = Figure() canvas = FigureCanvas(self.fig) #ax = Axes3D(self.fig, rect=[-.23,-.2,1.447,1.4]) ax = self.fig.add_subplot(1, 1, 1, projection='3d') self.fig.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1) ax.mouse_init() surf = ax.plot_surface(z1, -x1, -y1, rstride=1, cstride=1, cmap=cm.jet) ax.view_init(90,-90) canvas.show() canvas.set_size_request(600, 200) self.mainVBox.pack_start(canvas, True, True) #Neutral if (SHOW_NEUTRAL == True): cx, cy = self.getNeutral(neutral) cmags = self.makePolar(cx, cy) M = self.batchConvert2Polar(self.X, self.Y) #D = self.batchGetMinD(M, cmags) fakeX = [] for i in range(len(M)): xs = [] for j in range(1,33): xs.append(j) fakeX.append(xs) x1 = array(fakeX) y1 = array(M) Z = [] for i in range(len(M)): zs = [] for j in range(32): zs.append(i) Z.append(zs) z1 = array(Z) self.fig3 = Figure() canvas3 = FigureCanvas(self.fig3) ax = self.fig3.add_subplot(1, 1, 1, projection='3d') self.fig3.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1) ax.mouse_init() ax.plot_surface(z1, -x1, y1, rstride=1, cstride=1, cmap=cm.jet) ax.view_init(90,-90) canvas3.show() canvas3.set_size_request(600, 200) self.mainVBox.pack_start(canvas3, True, True) #Waveform windowsize = 0 self.fig2 = Figure() canvas2 = FigureCanvas(self.fig2) if (SHOW_WAVEFORM == True): fs, snd = wavread(self.wavname) chan = snd[:,0] t=array(range(len(chan)))/float(fs); if SHOW_SPECTROGRAM == True: wavax = self.fig2.add_subplot(2, 1, 1) else: wavax = self.fig2.add_subplot(1, 1, 1) wavax.plot(t,chan,'black'); wavax.set_xlim(0,max(t)) windowsize += 200 #Spectrogram if (SHOW_SPECTROGRAM == True): '''This calls Praat to get the spectrogram and adds it to the viewer''' specname = contours[:-4] + '.Spectrogram' cleanname = contours[:-4] + '.clean' cmd = ['/Applications/Praat.app/Contents/MacOS/Praat', self.static_dir + 'makeSpec.praat', self.wavname, specname] proc = subprocess.Popen(cmd) status = proc.wait() cmd2 = ['bash', self.static_dir + 'cleanspec.sh', specname, cleanname] proc2 = subprocess.Popen(cmd2) status2 = proc2.wait() f = open(cleanname, 'r').readlines() last = len(f)-1 x = f[last].split('\t') rows = int(x[0]) cols = int(x[1]) img = zeros((rows, cols)) for i in range(len(f)): x = f[i][:-1].split('\t') img[int(x[0])-1,int(x[1])-1] = float(x[2]) img = log(img) if SHOW_WAVEFORM == True: specax = self.fig2.add_subplot(2, 1, 2) else: specax = self.fig2.add_subplot(1, 1, 1) specax.imshow(img, cmap=cm.gray_r, origin='lower', aspect='auto') windowsize += 200 # show it if (SHOW_WAVEFORM == True) or (SHOW_SPECTROGRAM == True): canvas2.show() canvas2.set_size_request(600, windowsize) self.mainVBox.pack_start(canvas2, True, True) self.SHOW_LINGUAGRAM = SHOW_LINGUAGRAM self.SHOW_NEUTRAL = SHOW_NEUTRAL self.SHOW_WAVEFORM = SHOW_WAVEFORM self.SHOW_SPECTROGRAM = SHOW_SPECTROGRAM self.windowsize = windowsize
vocoded = lfilter((error_power,), a, vocoded) vocoded *= hann(len(block)) out[idx:idx+len(block)] += deemphasis(vocoded) return out def preemphasis(signal): return lfilter([1, -0.70], 1, signal) def deemphasis(signal): return lfilter([1, 0.70], 1, signal) def rms(signal): return sqrt(mean(power(signal, 2))) if __name__ == "__main__": fs, data = wavread('Mann.wav') data = array(data, dtype=double) data /= amax(absolute(data)) data = decimate(data, 4) fs = round(fs/4) block_len = 0.032 overlap = 0.5 order = 16 out = vocode(data, fs, block_len, overlap, order) wavwrite('vocoded.wav', fs, array(out/amax(absolute(out)) * (2**15-1), dtype=int16)) figure() plot(data)
# built-in imports import timeit # 3rd-party imports import numpy as np from scipy.io.wavfile import read as wavread from scipy.io.wavfile import write # local imports from world import main fs, x_int16 = wavread('test-mwm.wav') x = x_int16 / (2**15 - 1) vocoder = main.World() # profile print( timeit.timeit("vocoder.encode(fs, x, f0_method='harvest')", globals=globals(), number=1))
''' # imports # numpy import numpy as np # imports of scipy from scipy.io.wavfile import read as wavread from scipy.fftpack import fft from scipy.signal import lfilter, butter # graphs import matplotlib.pyplot as plt # from pylab import arange # [Fs, samples] = wavread("xmitas02.wav") #Fs = 150.0; # sampling rate Ts = 1.0 / Fs # sampling interval t = np.arange(0, Fs) # time vector nyq = 0.5 * Fs # pro filter # vygeneruj pasmovou propust na 4kHz +- 100Hz b, a = butter(1, [3900 / nyq, 4100 / nyq], 'bandpass', analog=False) filtered = lfilter(b, a, samples) # plt.plot(t, filtered, 'green', linewidth=.1) # plotting the spectrum #
down_sample_factor=dsf) # draw frequecny response bpf.H0_show(freq_high=20000) # draw frequecny response, using scipy bpf.f_show() # load a sample wav #path0='wav/400Hz-10dB_44100Hz_400msec.wav' #path0='wav/1KHz-10dB_44100Hz_400msec.wav' #path0='wav/3KHz-10dB_44100Hz_400msec.wav' #path0='wav/5KHz-10dB_44100Hz_400msec.wav' path0 = 'wav/1KHz-10dB_44100Hz_400ms-TwoTube_stereo.wav' try: sr, y = wavread(path0) except: print('error: wavread ') sys.exit() else: yg = y / (2**15) if yg.ndim == 2: # if stereo yg = np.average(yg, axis=1) print('sampling rate ', sr) print('y.shape', yg.shape) y2 = bpf.filtering(yg) # iir2( yg) # Exponential Moving Average with Half-wave rectification ema1 = Class_EMA1() y3 = ema1(y2)
# display_sample_rate = f.samplerate # sound_time = f.nframes*1.0/f.samplerate # sound_data = f.read_frames(f.nframes) # samples_to_take = int(math.floor(sound_time * display_sample_rate)) # time_step_for_samples = f.samplerate*1.0/display_sample_rate # wave = [] # for i in xrange(samples_to_take): # frame_offset = i * time_step_for_samples # if num_channels == 1: # wave.append(sound_data[frame_offset]) # else: # wave.append(sound_data[frame_offset][0]) rate, wave = wavread(infile) wavwrite('test.wav', rate, wave) (freq, amp) = get_component_frequencies(wave) # print type(s) # with open('data.txt', 'a') as textOutputFile: # for line in amp: # textOutputFile.write(str(line)) # textOutputFile.write(',') # Only plot first 4000 Hz hz=4000 freq = freq[0:hz] amp = amp[0:hz]
def read_wav(wav_file): fr, wav = wavread(wav_file) wav = wav/np.max(np.abs(wav)) return wav, fr
default='na_1_48k.wav', help='input wav file') parser.add_argument('--methodF0', '-m', default='harvest', help='F0 estimation method, harvest or dio ') parser.add_argument( '--not_requiem', action='store_false', help='use new waveform generator method from WORLD version 0.2.2') args = parser.parse_args() # load wav file wav_path = Path(args.inFILE) print('input wave path ', wav_path) fs, x_int16 = wavread(wav_path) x = x_int16 / (2**15 - 1) print('fs', fs) if 0: # resample fs_new = 16000 x = signal.resample_poly(x, fs_new, fs) fs = fs_new if 0: # low-cut B = signal.firwin(127, [0.01], pass_zero=False) A = np.array([1.0]) if 0: import matplotlib.pyplot as plt w, H = signal.freqz(B, A)
import librosa import pyrenn import IPython import matplotlib.pyplot as plt # Set the folders speakers = ['awb','bdl','clb','jmk','ksp','rms','slt'] root = os.getcwd() folderpath = os.path.join(root,'datasets',speakers[0],'wav') files = sorted(os.listdir(folderpath)) # Read the files for file in files: file = os.path.join(folderpath,file) fs,audio = wavread(file) break # IPython.display.Audio(file) # YAAPT pitches signal = basic.SignalObj(file) pitchY = pYAAPT.yaapt(signal, frame_length=25, frame_space=5, f0_min=40, f0_max=300)
def slice_signal(path, win_len, hop_len, win_frames, hop_frames, sampling_rate, stream): slices = [] sr, wavform = wavread(path) assert sampling_rate == sr wavform = torch.from_numpy(normalize_wave_minmax(wavform)) stft_complex = torch.stft(wavform, win_len, hop_len) stft_real_orig, stft_imag_orig = stft_complex[:, :, 0].numpy( ), stft_complex[:, :, 1].numpy() assert stream in ['in', 'out'] if stream == 'in': stft_real = in_real_scale(stft_real_orig) stft_imag = in_imag_scale(stft_imag_orig) else: stft_real = out_real_scale(stft_real_orig) stft_imag = out_imag_scale(stft_imag_orig) # print(np.max(np.abs(stft_real_recover - stft_real_orig))) # assert stft_real_recover.all() == stft_real_orig.all() # assert stft_imag_recover.all() == stft_imag_orig.all() # stft_real_recover = inverse_in_real_scale(stft_real) # stft_imag_recover = inverse_in_imag_scale(stft_imag) # # stft_recover = np.stack([stft_real_recover, stft_imag_recover], axis=-1) # signal_recover = torch.istft(torch.from_numpy(stft_recover), n_fft=400, hop_length=160) # wavwrite('./recover.wav', 16000, signal_recover.numpy()) # stft_orig = np.stack([stft_real_orig, stft_imag_orig], axis=-1) # signal_orig = torch.istft(torch.from_numpy(stft_orig), n_fft=400, hop_length=160) # wavwrite('./orig.wav', 16000, signal_orig.numpy()) # stft_real = inverse_out_real_scale(stft_real) # stft_imag = inverse_out_imag_scale(stft_imag) # stft = np.stack([np.expand_dims(stft_real, axis=0), np.expand_dims(stft_imag, axis=0)], axis=-1) len_frames = stft_complex.size()[-2] num_slices = math.floor((len_frames - win_frames) / hop_frames) + 1 if num_slices > 0: for idx_slice in range(num_slices): slices.append([ stft_real[:, idx_slice * hop_frames:idx_slice * hop_frames + win_frames], stft_imag[:, idx_slice * hop_frames:idx_slice * hop_frames + win_frames] ]) # slices_imag.append(stft_imag[:, idx_slice * hop_frames : idx_slice * hop_frames + win_frames].numpy()) # num_slices = len(slices) # slices_real, slices_imag = [], [] # for idx in range(num_slices): # slice_real = slices[idx][0][:, 2] # slice_imag = slices[idx][1][:, 2] # slices_real.append(slice_real) # slices_imag.append(slice_imag) # # stft_real = np.stack(slices_real) # stft_imag = np.stack(slices_imag) # # stft_real = inverse_out_real_scale(stft_real).T # stft_imag = inverse_out_imag_scale(stft_imag).T # stft = np.stack([np.expand_dims(stft_real, axis=0), np.expand_dims(stft_imag, axis=0)], axis=-1) # wav = torch.istft(torch.from_numpy(stft), 400, 160) # wavwrite('../save_wav/test2.wav', 16000, wav.numpy().T) return slices
import tensorflow as tf import librosa import scipy from scipy.io.wavfile import read as wavread import numpy as np # load in audio as an array data, sample_rate = librosa.load('load.wav', sr=None, mono=False) _, data2 = wavread('load.wav', True) print(np.max(np.abs(data2-data.T))) # sample partially data = data[:100] # FFT window size to be power of 2, exactly nonoverlapping chunk_size = 4 # try manual padding of synthesized data data = np.array([2,6,0,8,1,9,9,5]).astype(np.float32) # SciPy _, _, scipy_stft = scipy.signal.stft(data, window='hann', nperseg=chunk_size, noverlap=chunk_size*3//4, nfft=chunk_size, return_onesided=True, padded=True, axis=-1) _, scipy_istft = scipy.signal.istft(scipy_stft, fs=sample_rate, window='hann', nperseg=chunk_size, noverlap=chunk_size*3//4, nfft=chunk_size, input_onesided=True) # librosa rosa_stft = librosa.stft(data, n_fft=chunk_size, hop_length=chunk_size//4, win_length=chunk_size, window='hann', center=True,
def _get_image(self, name, nperseg=126, noverlap=None, mag_scale=np.log10(2**15)): """ From audio in the file name construct the magnitude/phase tensor. Parameters ---------- name : string Name of the audio file. nperseg : int Size of each FFT window for the STFT. noverlap : int or None Size of the overlap to the STFT. If None, then a half-step is used. mag_scale : float Value with which the magnitude will be scaled. Returns ------- mag_phase : ndarray, shape (stft_width, stft_heigth, 2) The magnitude/phase tensor. """ # Read the audio and downscale the rate by 2 rate, audio_sig = wavread(self.im_dir + name) rate, audio_sig = self._downsample(rate, audio_sig) # Set global rate if self.rate is None: self.rate = rate # Right pad audio to desired size if noverlap is None: noverlap = (nperseg + 1) // 2 length_orig = len(audio_sig) length_pad = int(np.ceil(length_orig / noverlap) * noverlap) audio_sig = np.pad(audio_sig, (0, length_pad - length_orig), 'constant') # Make a Short time Fourier transform frequencies, times, stft = signal.stft(audio_sig, fs=rate, nperseg=nperseg, noverlap=noverlap) # Convert to log10 magnitude and phase spectrogram = np.log10(np.absolute(stft) + 1e-10) phasegram = np.angle(stft) / np.pi # Scale angles to [-1, 1] # Scale the magnitude spectrogram /= mag_scale if stft.shape[1] != 128: # Pad the matrices spectrogram = np.pad(spectrogram, [(0, 0), (0, 128 - stft.shape[1])], 'minimum') phasegram = np.pad(phasegram, [(0, 0), (0, 128 - stft.shape[1])], 'constant') # Join into a two-channel tensor return np.stack((phasegram, spectrogram), axis=-1)
import os from os.path import join as pjoin from scipy.io.wavfile import read as wavread, write as wavwrite clean_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/clean_testset_wav_16k' noisy_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/noisy_testset_wav_16k' noise_dir = '/nas/staff/data_work/Sure/Edinburg_Speech/noise_testset_wav_16k' filenames = os.listdir(clean_dir) num_filenames = len(filenames) file_counter = 0 for filename in filenames: file_counter += 1 print('Processing audio file [{}/{}]: {}'.format(file_counter, num_filenames, filename)) clean_path = pjoin(clean_dir, filename) noisy_path = pjoin(noisy_dir, filename) noise_path = pjoin(noise_dir, filename) fs, clean_waveform = wavread(clean_path) _, noisy_waveform = wavread(noisy_path) noise_waveform = noisy_waveform - clean_waveform wavwrite(noise_path, fs, noise_waveform)
#!/usr/bin/env python from __future__ import division import numpy as np import matplotlib.pyplot as plt from scipy.io.wavfile import read as wavread from scipy.io.wavfile import write as wavwrite # Print entire, readable ndarrays np.set_printoptions(precision=3) np.set_printoptions(suppress=True) np.set_printoptions(threshold=np.nan) f, data = wavread('test.wav') ps = np.abs(np.fft.fft(data))**2 time_step = 1 / 44100 freqs = np.fft.fftfreq(data.size, time_step) idx = np.argsort(freqs) print ps plt.plot(freqs, ps) plt.show()
def show_spectrogram(path): sample_rate, X = wavread(path) output = specgram(X, Fs=sample_rate)
# # # quando necessário é plotado o áudio do arquivo # # t = np.linspace(0, N/fs, N) # # plt.plot(t, data_file) # MFCCsample = librosa.feature.mfcc(y=frameSample, sr=fs, fmin=fmin, fmax=fmax, # n_mfcc=n_mfcc, n_mels=n_mels, n_fft=n_fft) # frameMFCC[j] = MFCCsample[ofs_mfcc:] # kwFeat[i] = frameMFCC frameMFCC = {} kwFeat = {} for i in range(len(df['file'])): wavstr = df['file'][ i] # extrai a string contendo o nome do arquivo de audio [_, data_file] = wavread('../../' + wavstr) # Lê todo o arquivo de audio data_file = data_file / 32767 # normaliza as amostras do áudio para o range [-1,1] N = data_file.shape[0] # indica o tamanho do arquivo # # quando necessário é plotado o áudio do arquivo # t = np.linspace(0, N/fs, N) # plt.plot(t, data_file) for j in range(len( iAbre[i])): # para cada audio, retira os frames kw e as features if iAbre[i][j] < 0: break frameSample = data_file[iAbre[i][j] - frame_lenD2:iAbre[i][j] + frame_lenD2] MFCCsample = librosa.feature.mfcc(y=frameSample,
if len(data.shape) == 1 and self.output_channels != 1: # replicate first channel and broadcast to (chan, 1) data = np.tile(data, (self.output_channels, 1)).T if data.shape != (num_frames, self.output_channels): error = 'Can not broadcast array of shape {} to {}'.format( data.shape, (num_frames, self.output_channels)) raise ValueError(error) data = data.flatten().tostring() err = _pa.Pa_WriteStream(self._stream[0], data, num_frames) self._handle_error(err) if __name__ == '__main__': from scipy.io.wavfile import read as wavread import time fs, wave = wavread('thistle.wav') wave = np.array(wave, dtype=np.float32) wave /= 2**15 block_length = 4 def callback(in_data, frame_count, time_info, status): if status != 0: print(status) return (in_data, continue_flag) s = Stream(sample_rate=fs, block_length=block_length, callback=callback) s.start() # for n in range(int(fs*5/block_length)): # s.write(s.read(block_length)) # for idx in range(0, wave.size, block_length): # s.write(wave[idx:idx+block_length]) time.sleep(5) s.stop()
current_hop = resolution * round(float(current_hop)/resolution) return indicies, ideal_vals if __name__ == '__main__': from scipy.io.wavfile import read as wavread import matplotlib.pyplot as plt #''' fs, y = wavread('New Seal and New Spring_conv.wav') #fs, y = wavread('equation9sec.wav') y = y[...,0] t = y.shape[0] / np.float32(fs) #''' ''' f0 = 440 fs = 48000 t = 5 n = np.arange(fs*t) y = 0.5*np.cos(2*np.pi*f0*n/float(fs)) '''
import sys import time import numpy as np from scipy.io.wavfile import read as wavread from pysoundcard import Stream, continue_flag, complete_flag """Play an audio file.""" fs, wave = wavread(sys.argv[1]) wave = np.array(wave, dtype=np.float32) wave /= 2 ** 15 # normalize -max_int16..max_int16 to -1..1 play_position = 0 def callback(in_data, out_data, time_info, status): global play_position out_data[:] = wave[play_position:play_position + block_length] # TODO: handle last (often incomplete) block play_position += block_length if play_position + block_length < len(wave): return continue_flag else: return complete_flag block_length = 16 s = Stream(sample_rate=fs, block_length=block_length, callback=callback) s.start() while s.is_active(): time.sleep(0.1)
def inception_score(audio_fps, k, metagraph_fp, ckpt_fp, batch_size=100, tf_ffmpeg_ext=None, fix_length=False): use_tf_ffmpeg = tf_ffmpeg_ext is not None if not use_tf_ffmpeg: from scipy.io.wavfile import read as wavread if len(audio_fps) % k != 0: raise Exception( 'Number of audio files ({}) is not divisible by k ({})'.format( len(audio_fps), k)) group_size = len(audio_fps) // k # Restore graph graph = tf.Graph() with graph.as_default(): saver = tf.train.import_meta_graph(metagraph_fp) if use_tf_ffmpeg: x_fp = tf.placeholder(tf.string, []) x_bin = tf.read_file(x_fp) x_samps = tf.contrib.ffmpeg.decode_audio(x_bin, tf_ffmpeg_ext, 16000, 1)[:, 0] x = graph.get_tensor_by_name('x:0') scores = graph.get_tensor_by_name('scores:0') # Restore weights sess = tf.Session(graph=graph) saver.restore(sess, ckpt_fp) # Evaluate audio _all_scores = [] for i in xrange(0, len(audio_fps), batch_size): batch = audio_fps[i:i + batch_size] # Load audio files _xs = [] for audio_fp in batch: if use_tf_ffmpeg: _x = sess.run(x_samps, {x_fp: audio_fp}) else: fs, _x = wavread(audio_fp) if fs != 16000: raise Exception('Invalid sample rate ({})'.format(fs)) if _x.dtype == np.int16: _x = _x.astype(np.float32) _x /= 32767. if _x.ndim != 1: raise Exception('Invalid shape ({})'.format(_x.shape)) if fix_length: _x = _x[:16384] #_x = _x[-16384:] _x = np.pad(_x, (0, 16384 - _x.shape[0]), 'constant') if _x.shape[0] != 16384: raise Exception('Invalid number of samples ({})'.format( _x.shape[0])) _xs.append(_x) # Compute model scores _all_scores.append(sess.run(scores, {x: _xs})) sess.close() # Find labels _all_scores = np.concatenate(_all_scores, axis=0) _all_labels = np.argmax(_all_scores, axis=1) # Compute inception scores _inception_scores = [] for i in xrange(k): _group = _all_scores[i * group_size:(i + 1) * group_size] _kl = _group * (np.log(_group) - np.log(np.expand_dims(np.mean(_group, 0), 0))) _kl = np.mean(np.sum(_kl, 1)) _inception_scores.append(np.exp(_kl)) return np.mean(_inception_scores), np.std(_inception_scores), _all_labels
def __init__(self, path0): # , sampling_rate=48000): # initalize sr, y = wavread(path0) self.yg= y / (2 ** 15) self.sr= sr print ('sampling rate ', sr)
References ---------- .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc. ASSP-28 (4): 357-366, August 1980.""" https://www.researchgate.net/publication/261914482_Feature_Extraction_Methods_LPC_PLP_and_MFCC_In_Speech_Recognition Source Code ----------- https://github.com/cournape/talkbox ''' from scipy.io.wavfile import read as wavread from scikits.talkbox.features import mfcc from scikits.talkbox.linpred.levinson_lpc import * # data: raw audio data # fs: sample rate sr, signal = wavread('../recordings/obama.wav') # ceps: cepstral cofficients coeffs=13 ceps, mspec, spec = mfcc(signal, nwin=2048, nfft=2048, fs=sr, nceps=coeffs) print ("************************ MFCC ************************") print (ceps) # https://github.com/cournape/talkbox/blob/ee0ec30a6a6d483eb9284f72bdaf26bd99765f80/scikits/talkbox/linpred/levinson_lpc.py lpcResult = lpc(signal,1) print ("************************ LPC ************************") print (lpcResult)
import sys import numpy as np from scipy.io.wavfile import read as wavread from pysoundcard import Stream """Play an audio file.""" fs, wave = wavread(sys.argv[1]) wave = np.array(wave, dtype=np.int16) blocksize = 256 s = Stream(samplerate=fs, blocksize=blocksize, dtype='int16') s.start() while True: s.write(wave[0:(1024 * 100)]) s.stop()
def read_as_mfcc(path): sample_rate, X = wavread(path) ceps, mspec, spec = mfcc(X) return ceps
import numpy as np from scipy.io.wavfile import read as wavread x = np.loadtxt("out.txt") fs, audio = wavread("test.wav") audio = audio / (2**15) print("Error:", np.mean(np.abs(x - audio)))
def __init__(self, contours, neutral, SHOW_LINGUAGRAM, SHOW_NEUTRAL, SHOW_WAVEFORM, SHOW_SPECTROGRAM): '''center points determined by transforming the point (426, 393) several times with peterotron, and taking the average. ''' self.static_dir = os.getcwd() + '/' #self.centerX = 710 #self.centerY = 638 # these come from hand tuning to find the smallest range of y values of polar mags self.centerX = 665 self.centerY = 525 self.gladefile = self.static_dir + "LinguaViewer.glade" self.wTree = gtk.glade.XML(self.gladefile, "window1") self.win = self.wTree.get_widget("window1") self.win.set_title(contours) self.title = contours self.mainVBox = self.wTree.get_widget("vbox1") dic = { "on_window1_destroy": self.onDestroy, "on_tbPlay_clicked": self.playSound, "on_tbSave_clicked": self.onSave, "on_tbLabel_clicked": self.onLabel } self.wTree.signal_autoconnect(dic) self.X, self.Y = self.loadContours(contours) self.wavname = contours[:-4] + ".wav" #Linguagram if (SHOW_LINGUAGRAM == True): x1 = array(self.X) y1 = array(self.Y) Z = [] for i in range(len(self.X)): zs = [] for j in range(32): zs.append(i + 1) Z.append(zs) z1 = array(Z) self.fig = Figure() canvas = FigureCanvas(self.fig) #ax = Axes3D(self.fig, rect=[-.23,-.2,1.447,1.4]) ax = self.fig.add_subplot(1, 1, 1, projection='3d') self.fig.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1) ax.mouse_init() surf = ax.plot_surface(z1, -x1, -y1, rstride=1, cstride=1, cmap=cm.jet) ax.view_init(90, -90) canvas.show() canvas.set_size_request(600, 200) self.mainVBox.pack_start(canvas, True, True) #Neutral if (SHOW_NEUTRAL == True): cx, cy = self.getNeutral(neutral) cmags = self.makePolar(cx, cy) M = self.batchConvert2Polar(self.X, self.Y) #D = self.batchGetMinD(M, cmags) fakeX = [] for i in range(len(M)): xs = [] for j in range(1, 33): xs.append(j) fakeX.append(xs) x1 = array(fakeX) y1 = array(M) Z = [] for i in range(len(M)): zs = [] for j in range(32): zs.append(i) Z.append(zs) z1 = array(Z) self.fig3 = Figure() canvas3 = FigureCanvas(self.fig3) ax = self.fig3.add_subplot(1, 1, 1, projection='3d') self.fig3.subplots_adjust(left=-0.23, bottom=0, right=1.215, top=1) ax.mouse_init() ax.plot_surface(z1, -x1, y1, rstride=1, cstride=1, cmap=cm.jet) ax.view_init(90, -90) canvas3.show() canvas3.set_size_request(600, 200) self.mainVBox.pack_start(canvas3, True, True) #Waveform windowsize = 0 self.fig2 = Figure() canvas2 = FigureCanvas(self.fig2) if (SHOW_WAVEFORM == True): fs, snd = wavread(self.wavname) chan = snd[:, 0] t = array(range(len(chan))) / float(fs) if SHOW_SPECTROGRAM == True: wavax = self.fig2.add_subplot(2, 1, 1) else: wavax = self.fig2.add_subplot(1, 1, 1) wavax.plot(t, chan, 'black') wavax.set_xlim(0, max(t)) windowsize += 200 #Spectrogram if (SHOW_SPECTROGRAM == True): '''This calls Praat to get the spectrogram and adds it to the viewer''' specname = contours[:-4] + '.Spectrogram' cleanname = contours[:-4] + '.clean' cmd = [ '/Applications/Praat.app/Contents/MacOS/Praat', self.static_dir + 'makeSpec.praat', self.wavname, specname ] proc = subprocess.Popen(cmd) status = proc.wait() cmd2 = [ 'bash', self.static_dir + 'cleanspec.sh', specname, cleanname ] proc2 = subprocess.Popen(cmd2) status2 = proc2.wait() f = open(cleanname, 'r').readlines() last = len(f) - 1 x = f[last].split('\t') rows = int(x[0]) cols = int(x[1]) img = zeros((rows, cols)) for i in range(len(f)): x = f[i][:-1].split('\t') img[int(x[0]) - 1, int(x[1]) - 1] = float(x[2]) img = log(img) if SHOW_WAVEFORM == True: specax = self.fig2.add_subplot(2, 1, 2) else: specax = self.fig2.add_subplot(1, 1, 1) specax.imshow(img, cmap=cm.gray_r, origin='lower', aspect='auto') windowsize += 200 # show it if (SHOW_WAVEFORM == True) or (SHOW_SPECTROGRAM == True): canvas2.show() canvas2.set_size_request(600, windowsize) self.mainVBox.pack_start(canvas2, True, True) self.SHOW_LINGUAGRAM = SHOW_LINGUAGRAM self.SHOW_NEUTRAL = SHOW_NEUTRAL self.SHOW_WAVEFORM = SHOW_WAVEFORM self.SHOW_SPECTROGRAM = SHOW_SPECTROGRAM self.windowsize = windowsize
def PMBSegmentation(argv, nameFileOutputXML): inputPath = None outputPath = 'out.lab' boundariesPath = None verbose = False # Communs wLen = 0.016 wStep = 0.008 withEntropy = False with4Hz = False withNBS = False withLS = False moduLen = 1 speech_labels = {0: 'Non Speech', 1: 'Speech'} music_labels = {0: 'Non Music', 1: 'Music'} sort = False # entropy entropyTh = 0.4 # 4 Hz fcenter = 4.0 fwidth = 0.5 normalized = True N = 2048 ordre = 100 nbFilters = 30 energyTh = 1.5 # Music musicLen = 1.0 musicStep = 0.1 maxSegForLength = 1000 thLen = 0.04 thNb = 20 segments = [] boundaries = None # Lecture des arguments opts = argv #print opts i = 0 while (i < len(argv)): #print str(i) if opts[i] == '-h': printhelp() elif opts[i] == '-i': i = i + 1 inputPath = opts[i] elif opts[i] == '-o': outputPath = opts[i] elif opts[i] == '-b': i = i + 1 boundariesPath = opts[i] elif opts[i] == '-v': verbose = True elif opts[i] == '--sorted': sort = True elif opts[i] == '--Entropy': withEntropy = True elif opts[i] == '--4Hz': with4Hz = True elif opts[i] == '--NBS': withNBS = True elif opts[i] == '--LS': withLS = True elif opts[i] == '-w': i = i + 1 wLen = float(opts[i]) elif opts[i] == '-s': i = i + 1 wStep = float(opts[i]) i = i + 1 if inputPath == None: printhelp() exit(1) else: #print "Audio file path : "+ inputPath fe, data = wavread(inputPath) print "Audio file opened : " + inputPath fe = float(fe) m = iinfo(data[0]).max data = [float(d) / m for d in data] demi = int(wLen / 2 * fe) timeScale = range(demi, len(data) - demi, int(wStep * fe)) frames = [data[t - demi:t + demi] for t in timeScale] if withEntropy: if verbose: print 'Analyse de la modulation d\'entropy' entropy_values = [entropy(f) for f in frames] entropy_modulation = computeModulation(entropy_values, moduLen / wStep, withLog=False) with open('entropy.lab', 'w') as f: for t, v in zip(timeScale, entropy_modulation): f.write('%f\t%f\n' % (float(t) / fe, v)) entropy_modulation = [(e / entropyTh) - 1 if e < 2 * entropyTh else 1 for e in entropy_modulation] segments_entropy = decoupe(entropy_modulation) segments_entropy = [(s[0] * wStep, s[1] * wStep, speech_labels[s[2]] + ' (Entropy)') for s in segments_entropy] segments.extend(segments_entropy) if with4Hz: if verbose: print 'Analyse de la modulation d\'energie a 4Hz' Wo = fcenter / fe Wn = [Wo - (fwidth / 2) / fe, Wo + (fwidth / 2) / fe] num = firwin(ordre, Wn, pass_zero=False) melFilter = melFilterBank(nbFilters, N, fe) hw = hamming(wLen * fe) energy = [ dot(abs(rfft(hw * f, n=2 * N)[0:N])**2, melFilter) for f in frames ] # transposition de list of list energy = lfilter(num, 1, map(list, zip(*energy)), 0) energy = sum(energy) if normalized: energy = energy / mean(energy) energy_modulation = computeModulation(energy, moduLen / wStep, withLog=True) with open('energy.lab', 'w') as f: for t, v in zip(timeScale, energy_modulation): f.write('%f\t%f\n' % (float(t) / fe, v)) energy_modulation = [(e / energyTh) - 1 if e < 2 * energyTh else 1 for e in energy_modulation] segments_energy = decoupe(energy_modulation) segments_energy = [(s[0] * wStep, s[1] * wStep, speech_labels[s[2]] + ' (4Hz)') for s in segments_energy] segments.extend(segments_energy) if withLS: if verbose: print 'Analyse de la longueur des segments' if boundariesPath == None: a, b = segment(data, fe) boundaries = [(float(st[0]) / fe, ) for st in a] else: boundaries = readBoundaries(boundariesPath) times = array([b[0] for b in boundaries]) demi = musicLen / 2 timeScale = arange(demi, times[-1] - demi, musicStep) # On prend les plus petits !! segframes = [ sorted(diff(times[logical_and(times >= t - demi, times <= t + demi)]), reverse=True) for t in timeScale ] lengths = [mean(s[:min([maxSegForLength, len(s)])]) for s in segframes] with open('LS.lab', 'w') as f: for t, v in zip(timeScale, lengths): f.write('%f\t%f\n' % (float(t) / fe, v)) lengths = [(l / thLen) - 1 if l < 2 * thLen else 1 for l in lengths] segments_length = decoupe(lengths) segments_length = [(s[0] * musicStep, s[1] * musicStep, music_labels[s[2]] + ' (LS)') for s in segments_length] segments.extend(segments_length) if withNBS: if verbose: print 'Analyse du nombre de segments' if boundariesPath == None: if boundaries == None: a, b = segment(data, fe) boundaries = [(float(st[0]) / fe, ) for st in a] else: if boundaries == None: boundaries = readBoundaries(boundariesPath) times = array([b[0] for b in boundaries]) demi = musicLen / 2 timeScale = arange(demi, times[-1] - demi, musicStep) segnb = [ float(npSum(logical_and(times >= t - demi, times <= t + demi))) for t in timeScale ] with open('NBS.lab', 'w') as f: for t, v in zip(timeScale, segnb): f.write('%f\t%f\n' % (float(t) / fe, v)) segnb = [-(l / thNb) + 1 if l < 2 * thNb else 1 for l in segnb] segments_nb = decoupe(segnb) segments_nb = [(s[0] * musicStep, s[1] * musicStep, music_labels[s[2]] + ' (NBS)') for s in segments_nb] segments.extend(segments_nb) if sort: segments = sorted(segments, key=lambda x: x[0]) v = writeToXML(segments, nameFileOutputXML, withNBS, with4Hz, withLS, withEntropy) return v #print "Audio file processed sucessfully" '''
wav_fps = wav_fps[:args.n] # Graph to calculate feats x = tf.placeholder(tf.float32, [None]) x_trim = x[:16384] x_trim = tf.pad(x_trim, [[0, 16384 - tf.shape(x_trim)[0]]]) X = tf.contrib.signal.stft(x_trim, 2048, 128, pad_end=True) X_mag = tf.abs(X) W_mel = tf.contrib.signal.linear_to_mel_weight_matrix( num_mel_bins=128, num_spectrogram_bins=1025, sample_rate=16000, lower_edge_hertz=40., upper_edge_hertz=7800., ) X_mel = tf.matmul(X_mag, W_mel) X_lmel = tf.log(X_mel + 1e-6) X_feat = X_lmel # Calculate feats for each wav file with tf.Session() as sess: _X_feats = [] for wav_fp in tqdm(wav_fps): _, _x = wavread(wav_fp) _X_feats.append(sess.run(X_feat, {x: _x})) _X_feats = np.array(_X_feats) with open(args.out_fp, 'wb') as f: pickle.dump(_X_feats, f)
from scipy.io.wavfile import read as wavread file_name = "quantized/piano/single/mono_sound/piano_mono_2.wav" point = 44248 length = 100 rate, data = wavread(file_name) print("rate", rate) print("data", data.shape) print("expected", rate * 4) left = data[:, 1].tolist() episode = left[point:point + length] def find_subpattern(lst, pattern): indices = [] for i in range(len(lst) - length): if lst[i:i + length] == pattern: indices.append(i) return indices indexs = find_subpattern(left, episode) start = indexs[0] for i in range(len(indexs) - 1): indexs[i + 1] = indexs[i + 1] - start - 44100 * i print("indexs", indexs)
#!/usr/bin/python import sys import argparse import math import numpy as np from scipy.io.wavfile import read as wavread from scipy.io.wavfile import write as wavwrite # Print entire, readable ndarrays np.set_printoptions(precision=3) np.set_printoptions(suppress=True) np.set_printoptions(threshold=np.nan) rate_one, data_one = wavread('umbrellaonefiltered.wav') rate_two, data_two = wavread('umbrellatwofiltered.wav') zero_count = int(math.floor(len(data_one) / 2)) zero_count_two = int(math.floor(len(data_two) / 2)) if zero_count_two > zero_count: zero_count = zero_count_two # Zero pad file one zero_array = np.zeros(zero_count, dtype=np.float) data_one = np.concatenate([data_one, zero_array]) length_one = int(math.floor(len(data_one))) # FFT file one fft_one = np.fft.rfft(data_one) # Zero pad file two length_two = int(math.floor(len(data_two)))
plt.grid() fig.tight_layout() plt.show() if __name__ == '__main__': from scipy.io.wavfile import read as wavread # instance w = Class_Wavelet1() # load wav sample path = 'sample1.wav' sr, x = wavread(path) # (1) show back to original waveform by transform and inverse transform # select switch (filter): if value is 0.0: then doesn't use the element # s1, s2, s3, s4, s5, s6, s7, s8 flt = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # use all elements lng0 = 2048 # set length of wavelet transform y = w.trans_itrans_level8(x[0:lng0], filter=flt, show=True) # (2) show comparison with composition from selected elements only # select switch (filter): if value is 0.0: then doesn't use the element # s1, s2, s3, s4, s5, s6, s7, s8 flt = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0] # only s6, s7, and s8 are used lng0 = 2048 y = w.trans_itrans_level8(x[0:lng0], filter=flt, show=True)