def test(filename=None): import random, os import matplotlib.pyplot as plt from sys import argv #signal, params = read_signal(sound,WINSIZE) scenario = None if filename != None: scene = os.path.basename(filename)[0] else: filename = random.choice([ x for x in os.listdir("tmp/") if os.path.splitext(x)[1] == ".flac" ]) scene = filename[0] filename = "tmp/" + filename print(filename) truths = vad.load_truths() signal, rate = speech.read_soundfile(filename) seconds = float(len(signal)) / rate winsize = librosa.time_to_samples(float(WINMS) / 1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize, window, 5) res, threshold, nstart, nend = ltsd.compute(signal) segments = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.frames_to_time(segments, rate, winsize / 2) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot((signal/np.max(signal))*np.mean(res)+np.mean(res)) ax.plot(np.linspace(0, seconds, len(res)), res) ax.plot([0, seconds], [threshold, threshold]) vad.plot_segments(truths[scene]['combined'], segments, ax) n1 = float(nstart) / rate n2 = float(nend) / rate ax.vlines([n1, n2], -20, 20) plt.show()
def test(filename=None): import random, os import matplotlib.pyplot as plt from sys import argv #signal, params = read_signal(sound,WINSIZE) scenario=None if filename != None: scene = os.path.basename(filename)[0] else: filename = random.choice([x for x in os.listdir("tmp/") if os.path.splitext(x)[1] == ".flac"]) scene = filename[0] filename = "tmp/"+filename print(filename) truths = vad.load_truths() signal,rate = speech.read_soundfile(filename) seconds = float(len(signal))/rate winsize = librosa.time_to_samples(float(WINMS)/1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize,window,5) res, threshold,nstart,nend = ltsd.compute(signal) segments = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.frames_to_time(segments, rate, winsize/2) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot((signal/np.max(signal))*np.mean(res)+np.mean(res)) ax.plot(np.linspace(0,seconds, len(res)), res) ax.plot([0, seconds], [threshold, threshold]) vad.plot_segments(truths[scene]['combined'], segments, ax) n1 = float(nstart)/rate n2 = float(nend)/rate ax.vlines([n1,n2], -20,20) plt.show()
def pipeline(path, frame_ms=64, hop_ms=64): sig, rate = speech.read_soundfile(path) fsize = librosa.time_to_samples(float(frame_ms)/1000, rate)[0] hop = librosa.time_to_samples(float(hop_ms)/1000, rate)[0] frames = librosa.util.frame(sig, fsize, hop) rms = np.apply_along_axis(speech.rms, 0, frames) H, p = spectral_entropy(frames, rate, fsize) return sig, rate, frames, fsize, rms, H, p
def vad(soundfile, noisefile=None): signal, rate = speech.read_soundfile(soundfile) if noisefile != None: noise, nrate = speech.read_soundfile(noisefile) print("found noisefile: " + noisefile) else: noise = None seconds = float(len(signal)) / rate winsize = librosa.time_to_samples(float(WINMS) / 1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize, window, 5, init_noise=noise) res, threshold, nstart, nend = ltsd.compute(signal) segments, = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.samples_to_time(segments, rate).tolist() indexes = [] for s in segments: indexes += s indexes.append(seconds) return indexes
def vad(soundfile, noisefile=None): signal,rate = speech.read_soundfile(soundfile) if noisefile != None: noise,nrate = speech.read_soundfile(noisefile) print("found noisefile: "+noisefile) else: noise = None seconds = float(len(signal))/rate winsize = librosa.time_to_samples(float(WINMS)/1000, rate)[0] window = sp.hanning(winsize) ltsd = LTSD(winsize,window,5, init_noise=noise) res, threshold,nstart,nend = ltsd.compute(signal) segments, = ltsd.segments(res, threshold) #print(float(len(signal))/rate, librosa.core.frames_to_time(len(res), 8000, winsize/2)) segments = librosa.core.samples_to_time(segments, rate).tolist() indexes = [] for s in segments: indexes += s indexes.append(seconds) return indexes
def pipeline(path, frame_ms=30, hop_ms=15, filt=True, noisy=True, shift=True, snr=60): #sig, rate = librosa.load(path) #sig2, rate2 = ad.read_file(path) sig, rate = speech.read_soundfile(path) #sig = signal.wiener(sig) fsize = librosa.time_to_samples(float(frame_ms) / 1000, rate)[0] hop = librosa.time_to_samples(float(hop_ms) / 1000, rate)[0] if filt: sig = bp_filter(sig, lowcut=120, highcut=1000) if noisy: sig = speech.add_noise(sig, "noise8k/white.flac", snr) frames = librosa.util.frame(sig, fsize, hop) w = signal.hann(fsize) #frames_W = np.zeros_like(frames) #print(frames.shape) #frames = frames.T #print(w.shape) frames_w = np.apply_along_axis(lambda x, w: x * w, 0, frames, w) frames = frames_w frames = np.apply_along_axis(lambda x, w: x / (w + 1e-15), 0, frames, w) # frames_W[i] = signal.convolve(frames[i],w, mode='same') #frames = frames_W.T #w = signal.correlate(w,w,mode='full') #w = w[w.size/2:] #print(frames.shape) #frames = sigutil.enframe(sig, fsize, hop, signal.hann) #print("normalized autocorrelation") naccs = np.apply_along_axis(nacc, 0, frames) #print("trimming") naccs = np.apply_along_axis(trim_frame, 0, naccs) lags = np.zeros(len(naccs.T)) acf_n = np.zeros(len(naccs.T)) for i in range(len(naccs.T)): frame = naccs.T[i] relmax = signal.argrelmax(frame)[0] if len(relmax) > 0: argmax2 = relmax[0] + np.argmax(frame[relmax[0]:]) else: argmax2 = np.argmax(frame) #print(relmax) """ if len(relmax)>=2: #print(relmax[0], relmax[1], relmax[1]-relmax[0]) lags[i] = relmax[1]-relmax[0] elif len(relmax) == 1: lags[i] = relmax[0] """ lags[i] = argmax2 acf_n[i] = len(relmax) #print(lags[i], len(relmax)) naccs.T[i] = np.roll(frame, -1 * argmax2) #minacs = np.zeros_like(naccs) #for i in range(len(naccs.T)): # minacs[:,i] = min_ac(naccs.T, i) meanacs = np.zeros_like(naccs) for i in range(len(naccs.T)): meanacs[:, i] = mean_ac(naccs.T, i) #print(naccs.shape) #print(meanacs.shape) #print("lags") #print("variances") #acvars = np.apply_along_axis(acvar, 0, naccs2) acvars = np.apply_along_axis(acvar, 0, meanacs) #print("ltacs") ltacs = np.zeros_like(acvars) for i in range(len(acvars)): ltacs[i] = ltac(acvars, i) print("done: " + path) return sig, rate, frames, fsize, meanacs, acvars, ltacs, (lags, acf_n)
def pipeline(path, frame_ms=30, hop_ms=15, filt=True, noisy=True, shift=True, snr=30): #sig, rate = librosa.load(path) #sig2, rate2 = ad.read_file(path) sig, rate = speech.read_soundfile(path) sig = signal.wiener(sig) fsize = librosa.time_to_samples(float(frame_ms)/1000, rate)[0] hop = librosa.time_to_samples(float(hop_ms)/1000, rate)[0] if filt: sig = bp_filter(sig) if noisy: sig = speech.add_noise(sig, "noise8k/white.flac", snr) frames = librosa.util.frame(sig, fsize, hop) w = signal.hann(fsize) #frames_W = np.zeros_like(frames) #print(frames.shape) #frames = frames.T #print(w.shape) frames_w = np.apply_along_axis(lambda x,w: x*w, 0, frames, w) frames = frames_w frames = np.apply_along_axis(lambda x,w: x/(w+1e-15), 0, frames, w) # frames_W[i] = signal.convolve(frames[i],w, mode='same') #frames = frames_W.T #w = signal.correlate(w,w,mode='full') #w = w[w.size/2:] #print(frames.shape) #frames = sigutil.enframe(sig, fsize, hop, signal.hann) #print("normalized autocorrelation") naccs = np.apply_along_axis(nacc, 0, frames) #print("trimming") naccs = np.apply_along_axis(trim_frame, 0, naccs) lags = np.zeros(len(naccs.T)) acf_n = np.zeros(len(naccs.T)) for i in range(len(naccs.T)): frame = naccs.T[i] relmax = signal.argrelmax(frame)[0] if len(relmax)>0: argmax2 = relmax[0] + np.argmax(frame[relmax[0]:]) else: argmax2 = np.argmax(frame) #print(relmax) """ if len(relmax)>=2: #print(relmax[0], relmax[1], relmax[1]-relmax[0]) lags[i] = relmax[1]-relmax[0] elif len(relmax) == 1: lags[i] = relmax[0] """ lags[i] = argmax2 acf_n[i] = len(relmax) #print(lags[i], len(relmax)) naccs.T[i] = np.roll(frame, -1*argmax2) #minacs = np.zeros_like(naccs) #for i in range(len(naccs.T)): # minacs[:,i] = min_ac(naccs.T, i) meanacs = np.zeros_like(naccs) for i in range(len(naccs.T)): meanacs[:,i] = mean_ac(naccs.T, i) #print(naccs.shape) #print(meanacs.shape) #print("lags") #print("variances") #acvars = np.apply_along_axis(acvar, 0, naccs2) acvars = np.apply_along_axis(acvar, 0, meanacs) #print("ltacs") ltacs = np.zeros_like(acvars) for i in range(len(acvars)): ltacs[i] = ltac(acvars, i) print("done: "+path) return sig, rate, frames, fsize, meanacs, acvars, ltacs, (lags, acf_n)