def getDisplacements2D(self, Z=None, window=False): """ Use phase correlation to find the relative displacement between each time step """ if Z is None: Z = self.getNbPixelsPerFrame()/self.getNbPixelsPerSlice()/2 shape = np.asarray(self.get2DShape()) if window: ham = np.hamming(shape[1])*np.atleast_2d(np.hamming(shape[0])).T else: ham = 1.0 displs = np.zeros((self.getNbFrames(),2)) a = rfft2(self.get2DSlice(T=0, Z=Z)*ham) for t in range(1,self.getNbFrames()): b = rfft2(self.get2DSlice(T=t, Z=Z)*ham) #calculate the normalized cross-power spectrum #R = numexpr.evaluate( # 'a*complex(real(b), -imag(b)/abs(a*complex(real(b), -imag(b))))' # ) R = a*b.conj() Ra = np.abs(a*b.conj()) R[Ra>0] /= Ra[Ra>0] r = irfft2(R) #Get the periodic position of the peak l = r.argmax() displs[t] = np.unravel_index(l, r.shape) #prepare next step a = b return np.where(displs<shape/2, displs, displs-shape)
def applyWindow(self, window="hanning", ww=0, cf=0): ''' Apply window function to frequency domain data cf: the frequency the window is centered over [Hz] ww: the window width [Hz], if ww equals 0 the window covers the full range ''' self.info("Applying %s window ..." % window) if window == "hanning": if ww == 0: w = np.hanning(self.numfreq) else: pos = int((cf - self.lowF) / self.deltaF) halfwidth = int(ww / (2.0 * self.deltaF)) w = np.zeros(self.numfreq) w[pos - halfwidth:pos + halfwidth] = np.hanning(2 * halfwidth) elif window == "hamming": if ww == 0: w = np.hamming(self.numfreq) else: pos = int((cf - self.lowF) / self.deltaF) halfwidth = int(ww / (2.0 * self.deltaF)) w = np.zeros(self.numfreq) w[pos - halfwidth:pos + halfwidth] = np.hamming(2 * halfwidth) elif window == "blackman": if ww == 0: w = np.blackman(self.numfreq) else: pos = int((cf - self.lowF) / self.deltaF) halfwidth = int(ww / (2.0 * self.deltaF)) w = np.zeros(self.numfreq) w[pos - halfwidth:pos + halfwidth] = np.blackman(2 * halfwidth) self.data = self.data * w self.done()
def lcn_mauch(X, kernel=None, rho=0): """Apply a version of local contrast normalization (LCN), inspired by Mauch, Dixon (2009), "Approximate Note Transcription...". Parameters ---------- X : np.ndarray, ndim=2 Input representation. kernel : np.ndarray Convolution kernel (should be roughly low-pass). rho : scalar Scalar applied to the final output for heuristic range control. Returns ------- Z : np.ndarray The processed output. """ if kernel is None: dim0, dim1 = 15, 37 dim0_weights = np.hamming(dim0 * 2 + 1)[:dim0] dim1_weights = np.hamming(dim1) kernel = dim0_weights[:, np.newaxis] * dim1_weights[np.newaxis, :] kernel /= kernel.sum() Xh = convolve2d(X, kernel, mode='same', boundary='symm') V = hwr(X - Xh) S = np.sqrt( convolve2d(np.power(V, 2.0), kernel, mode='same', boundary='symm')) S2 = np.zeros(S.shape) + S.mean() S2[S > S.mean()] = S[S > S.mean()] if S2.sum() == 0.0: S2 += 1.0 return V / S2**rho
def show_raw(item): global audio_x, audio_y, freq, curItem, filt curItem = item if l_rms is not None: p1.removeItem(l_rms) p2.removeItem(l_rms2) if l_fdlp is not None: p1.removeItem(l_fdlp) p2.removeItem(l_fdlp2) if l_tae is not None: p1.removeItem(l_tae) p2.removeItem(l_tae2) fn = item.text() w = ef.load_audio(fn) freq = w[2] p1.clear() p2.clear() x_r = np.arange(len(w[0]))/float(w[2]) audio_x = x_r audio_y = w[0]/max(abs(w[0])) if filt is not None: ind = int(filt * len(audio_y)/freq) if ind > 1024: filter = np.append(np.zeros(ind-1024),np.hamming(2048),np.zeros(len(audio_y)-ind-1024)) else: filter = np.append(np.hamming(2*ind),np.zeros(len(audio_y)-2*ind)) audio_y = np.real(np.fft.ifft(np.fft.fft(audio_y)*filter)) audio_y = audio_y/max(abs(audio_y)) p1.plot(audio_x,audio_y,pen=(1,4)) lr.setBounds([x_r[0],x_r[-1]]) lr.setRegion([x_r[0],x_r[-1]]) p1.addItem(lr) p2.plot(audio_x,np.abs(audio_y),pen=(1,4))
def parse_ICA_results(ICA, buffer_window): #time signals = {} signals["id"] = "ICA" signals["bufferWindow"] = buffer_window # ** for 3 channels with ICA** one = np.squeeze(np.asarray(ICA[:, 0])).tolist() two = np.squeeze(np.asarray(ICA[:, 1])).tolist() three = np.squeeze(np.asarray(ICA[:, 2])).tolist() one = (np.hamming(len(one)) * one) two = (np.hamming(len(two)) * two) three = (np.hamming(len(three)) * three) one = np.fft.irfft(one).astype(float).tolist() two = np.fft.irfft(two).astype(float).tolist() three = np.fft.irfft(three).astype(float).tolist() power_ratio = [0, 0, 0] power_ratio[0] = np.sum(one)/np.amax(one) power_ratio[1] = np.sum(two)/np.amax(two) power_ratio[2] = np.sum(three)/np.amax(three) if np.argmax(power_ratio) == 0: signals["array"] = one elif np.argmax(power_ratio) == 1: signals["array"] = two else: signals["array"] = three print power_ratio print signals return signals
def process_patch(X): win = np.outer( np.hamming(X.shape[0]), np.hamming(X.shape[1]) ) if np.any(np.iscomplex(X)): return np.abs(np.fft.fftn(X*win))**0.5 else: return np.abs(np.fft.rfftn(X*win))**0.5
def average_energy(audio, fs=44100, n=1024): Ew = np.sum(np.hamming(n)**2) result = np.empty(len(audio)/n) for i in range(0,len(audio)/n): result[i] = (np.sum(np.absolute(np.hamming(n)*audio[i*n:(i+1)*n]))/(float(n)*Ew)) t = np.arange(len(result)) * (float(n)/fs) return result, t
def getDispl2DImage(self, t0=0, t1=1, Z=0): ham = np.hamming(self.get2DShape()[1])*np.atleast_2d(np.hamming(self.get2DShape()[0])).T a = rfft2(self.get2DSlice(T=t0, Z=Z)*ham) b = rfft2(self.get2DSlice(T=t1, Z=Z)*ham) R = numexpr.evaluate( 'a*complex(real(b), -imag(b)/abs(a*complex(real(b), -imag(b))' ) return irfft2(R)
def align(frames, template): """ Warp each slice of the 3D array frames to align it to *template*. """ if frames.shape[:2] != template.shape: raise ValueError('Template must be same shape as one slice of frame array') # Calculate xs and ys to sample from one frame xs, ys = np.meshgrid(np.arange(frames.shape[1]), np.arange(frames.shape[0])) # Calculate window to use in FFT convolve w = np.outer(np.hamming(template.shape[0]), np.hamming(template.shape[1])) # Calculate a normalisation for the cross-correlation ccnorm = 1.0 / fftconvolve(w, w) # Set border of normalisation to zero to avoid overfitting. Borser is set so that there # must be a minimum of half-frame overlap ccnorm[:(template.shape[0]>>1),:] = 0 ccnorm[-(template.shape[0]>>1):,:] = 0 ccnorm[:,:(template.shape[1]>>1)] = 0 ccnorm[:,-(template.shape[1]>>1):] = 0 # Normalise template tmpl_min = template.min() norm_template = template - tmpl_min tmpl_max = norm_template.max() norm_template /= tmpl_max warped_ims = [] for frame_idx in xrange(frames.shape[2]): logging.info('Aligning frame {0}/{1}'.format(frame_idx+1, frames.shape[2])) frame = frames[:,:,frame_idx] # Normalise frame norm_frame = frame - tmpl_min norm_frame /= tmpl_max # Convolve template and frame conv_im = fftconvolve(norm_template*w, np.fliplr(np.flipud(norm_frame*w))) conv_im *= ccnorm # Find maximum location max_loc = np.unravel_index(conv_im.argmax(), conv_im.shape) # Convert location to shift dy = max_loc[0] - template.shape[0] + 1 dx = max_loc[1] - template.shape[1] + 1 logging.info('Offset computed to be ({0},{1})'.format(dx, dy)) # Warp image warped_ims.append(dtcwt.sampling.sample(frame, xs-dx, ys-dy, method='bilinear')) return np.dstack(warped_ims)
def apply_hamming(frames, inv=False): """ Computes either the hamming window or its inverse and applies it to a sequence of frames. :param frames: Frames with dimension num_frames x num_elements_per_frame :param inv: Indicates if the window should be inversed. :return: """ M = frames.shape[1] win = np.hamming(M)**(-1) if inv else np.hamming(M) return frames * win
def get_image_data(filename): im = pygame.image.load(filename) sz = im.get_size() im = pygame.transform.scale(im, (sz[0]/SCALE_FACTOR, sz[1]/SCALE_FACTOR)) im2 = im.convert(8) a = pygame.surfarray.array2d(im2) hw1 = numpy.hamming(a.shape[0]) hw2 = numpy.hamming(a.shape[1]) a = a.transpose() a = a*hw1 a = a.transpose() a = a*hw2 return a
def notSoRandomWalk(shape, std=1, trendFilterLength=32, lpfLength=16): """bandpass filter a random walk so that the low-frequency trend / drift is eliminated and the high-frequency noise is attenuated""" walk = randwalk(shape, std=std) filt = np.hamming(trendFilterLength) filt /= np.sum(filt) whichAxis = len(walk.shape) > 1 # 0 iff 1d, else 1 # subtract baseline drift, roughly trend = filters.convolve1d(walk, weights=filt, axis=whichAxis, mode='reflect') walk -= trend # subtract noisey spikes walk = filters.convolve1d(walk, weights=np.hamming(lpfLength), axis=whichAxis, mode='reflect') return walk
def cfrequency(data, fs, smoothie, fk): """ Central frequency of a signal. Computes the central frequency of the given data which can be windowed or not. The central frequency is a measure of the frequency where the power is concentrated. It corresponds to the second moment of the power spectral density function. The central frequency is returned. :type data: :class:`~numpy.ndarray` :param data: Data to estimate central frequency from. :param fs: Sampling frequency in Hz. :param smoothie: Factor for smoothing the result. :param fk: Coefficients for calculating time derivatives (calculated via central difference). :return: **cfreq[, dcfreq]** - Central frequency, Time derivative of center frequency (windowed only). """ nfft = util.nextpow2(data.shape[1]) freq = np.linspace(0, fs, nfft + 1) freqaxis = freq[0:nfft / 2] cfreq = np.zeros(data.shape[0]) if np.size(data.shape) > 1: i = 0 for row in data: Px_wm = welch(row, np.hamming(len(row)), util.nextpow2(len(row))) Px = Px_wm[0:len(Px_wm) / 2] cfreq[i] = np.sqrt(np.sum(freqaxis ** 2 * Px) / (sum(Px))) i = i + 1 cfreq = util.smooth(cfreq, smoothie) #cfreq_add = \ # np.append(np.append([cfreq[0]] * (np.size(fk) // 2), cfreq), # [cfreq[np.size(cfreq) - 1]] * (np.size(fk) // 2)) # faster alternative cfreq_add = np.hstack( ([cfreq[0]] * (np.size(fk) // 2), cfreq, [cfreq[np.size(cfreq) - 1]] * (np.size(fk) // 2))) dcfreq = signal.lfilter(fk, 1, cfreq_add) #dcfreq = dcfreq[np.size(fk) // 2:(np.size(dcfreq) - np.size(fk) // 2)] # correct start and end values of time derivative dcfreq = dcfreq[np.size(fk) - 1:np.size(dcfreq)] return cfreq, dcfreq else: Px_wm = welch(data, np.hamming(len(data)), util.nextpow2(len(data))) Px = Px_wm[0:len(Px_wm) / 2] cfreq = np.sqrt(np.sum(freqaxis ** 2 * Px) / (sum(Px))) return cfreq
def __init__(self, winSecs, soundSecs, sampleRate): """ :param winSecs: :param soundSecs: :param sampleRate: """ self.sampleRate = sampleRate self.winSecs = winSecs self.winSamples = int(round(sampleRate*winSecs)) self.soundSecs = soundSecs self.soundSamples = int(round(sampleRate*soundSecs)) self.startWindow = numpy.hamming(self.winSamples*2)[0:self.winSamples] self.endWindow = numpy.hamming(self.winSamples*2)[self.winSamples:] self.finalWinStart = self.soundSamples-self.winSamples
def make_wave(self): # inverts the spectrogram and returns a wave # # return: Wave res = [] for t, spectrum in sorted(self.spec_map.iteritems()): wave = spectrum.make_wave() n = len(wave) window = 1 / np.hamming(n) wave.ys *= window i = wave.find_index(t) start = i - (n // 2) end = start + n res.append((start, end, wave)) starts, ends, waves = zip(*res) low = min(starts) high = max(ends) ys = np.zeros(high - low, np.float) for start, end, wave in res: ys[start:end] = wave.ys return Wave(ys, framerate=wave.framerate)
def smooth(params, win, mode = _SMOOTH): """ gaussian smoothing """ if win >= len(params)-1: win = len(params)-1 if win % 2 != 0: win+=1 s = np.r_[params[win-1:0:-1],params,params[-1:-win:-1]] w = np.hamming(win) y = np.convolve(w/w.sum(),s,mode='valid') if mode == _DETREND: yy = y[(win/2-1):-(win/2)] return params-yy elif mode == _TREND: return y[(win/2-1):-(win/2)] else: return y[(int(round(win/2))-1):-(int(round(win/2)))]
def makeimg(wav): global callpath global imgpath fs, frames = wavfile.read(os.path.join(callpath, wav)) pylab.ion() # generate specgram pylab.figure(1) # generate specgram pylab.specgram( frames, NFFT=256, Fs=22050, detrend=pylab.detrend_none, window=numpy.hamming(256), noverlap=192, cmap=pylab.get_cmap('Greys')) x_width = len(frames)/fs pylab.ylim([0,11025]) pylab.xlim([0,round(x_width,3)-0.006]) img_path = os.path.join(imgpath, wav.replace(".wav",".png")) pylab.savefig(img_path) return img_path
def iff_filter(sig, scale, plot_show = 0): order = max(sig.size*scale,90) #order = 80 # Extend signal on both sides for removing boundary effect in convolution sig_extend = np.ones(sig.size+int(order/2)*2) sig_extend[int(order/2):(sig.size+int(order/2))] = sig sig_extend[0:int(order/2)] = sig[(sig.size-int(order/2)):sig.size] sig_extend[(sig.size+int(order/2)):sig_extend.size] = sig[0:int(order/2)] # convolve with hamming window and normalize smooth_sig = np.convolve(sig_extend,np.hamming(order),'same') smooth_sig = smooth_sig[int(order/2):(sig.size+int(order/2))] smooth_sig = np.amax(sig)/np.amax(smooth_sig)*smooth_sig # Plot signal for debug if(plot_show == 1): fig, ax = plt.subplots(ncols=2) ax[0].plot(sig) ax[0].plot(smooth_sig,'-r') ax[0].plot(med_sig,'black') ax[1].loglog(rfft(sig)) ax[1].loglog(rfft(smooth_sig),'-r') ax[1].loglog(rfft(med_sig),'black') plt.show() return smooth_sig
def window (v, func='hanning', params=None): """ applies a windowing function to the 3D volume v (inplace, as reference) """ N = v.shape[0] D = v.ndim if any( [ d != N for d in list(v.shape) ] ) or D != 3: raise Exception("Error: Volume is not Cube.") def apply_seperable_window (v, w): v *= n.reshape(w,(-1,1,1)) v *= n.reshape(w,(1,-1,1)) v *= n.reshape(w,(1,1,-1)) if func=="hanning": w = n.hanning(N) apply_seperable_window(v,w) elif func=='hamming': w = n.hamming(N) apply_seperable_window(v,w) elif func=='gaussian': raise Exception('Unimplimented') elif func=='circle': c = gencoords(N,3) if params==None: r = N/2 -1 else: r = params[0]*(N/2*1) v *= (n.sum(c**2,1) < ( r ** 2 ) ).reshape((N,N,N)) elif func=='box': v[:,0,0] = 0.0 v[0,:,0] = 0.0 v[0,0,:] = 0.0 else: raise Exception("Error: Window Type Not Supported")
def compute_pitch_hps(x, Fs, dF=None, Fmin=30., Fmax=900., H=5): # default value for dF frequency resolution if dF == None: dF = Fs / x.size # Hamming window apodization x = x.copy() x *= np.hamming(x.size) # number of points in FFT to reach the resolution wanted by the user n_fft = np.ceil(Fs / dF) # DFT computation X = np.abs(np.fft.fft(x, n=int(n_fft))) # limiting frequency R_max computation R = np.floor(1 + n_fft / 2. / H) # computing the indices for min and max frequency N_min = np.ceil(Fmin / Fs * n_fft) N_max = np.floor(Fmax / Fs * n_fft) N_max = min(N_max, R) # harmonic product spectrum computation indices = (np.arange(N_max)[:, np.newaxis] * np.arange(1, H+1)).astype(int) P = np.prod(X[indices.ravel()].reshape(N_max, H), axis=1) ix = np.argmax(P * ((np.arange(P.size) >= N_min) & (np.arange(P.size) <= N_max))) return dF * ix
def make_wave(self): """Inverts the spectrogram and returns a Wave. returns: Wave """ res = [] for t, spectrum in sorted(self.spec_map.iteritems()): wave = spectrum.make_wave() n = len(wave) window = 1 / np.hamming(n) wave.window(window) i = wave.find_index(t) start = i - n // 2 end = start + n res.append((start, end, wave)) starts, ends, waves = zip(*res) low = min(starts) high = max(ends) ys = np.zeros(high-low, np.float) for start, end, wave in res: ys[start:end] = wave.ys # ts = np.arange(len(ys)) / self.framerate return Wave(ys, framerate=wave.framerate)
def make_spectrogram(self, seg_length, win_flag=True): """Computes the spectrogram of the wave. seg_length: number of samples in each segment win_flag: boolean, whether to apply hamming window to each segment returns: Spectrogram """ if win_flag: window = np.hamming(seg_length) i, j = 0, seg_length step = seg_length / 2 # map from time to Spectrum spec_map = {} while j < len(self.ys): segment = self.slice(i, j) if win_flag: segment.window(window) # the nominal time for this segment is the midpoint t = (segment.start + segment.end) / 2 spec_map[t] = segment.make_spectrum() i += step j += step return Spectrogram(spec_map, seg_length)
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1): originalWav = Sndfile(inputAudio, 'r') x = originalWav.read_frames(originalWav.nframes) fs = originalWav.samplerate nChannel = originalWav.channels print fs if nChannel >1: x = x[0] w = np.hamming(801) N = 2048 t = -90 minSineDur = .005 maxnSines = 150 freqDevOffset = 20 freqDevSlope = 0.02 Ns = 512 H = Ns/4 tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) inputDur = float(len(tfreq)*H/fs) #timeScale = np.array([0.1,0.1, inputDur, inputDur*2]) timeScale = np.array([0,0, .4,outputDuration]) ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale) y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) if writeOutput ==1: outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate) outputWav.write_frames(y) outputWav.close() else: return y, fs, nChannel
def smooth(input_data, nth_octave = 6, window_type='hamming'): """ Smooth input data over 1/n octave """ f_min = 30 f_max = 20e3 number_of_octaves = math.log(f_max / f_min, 2) # ideally, this should be computed from the display resolution number_of_points = 4048 points_per_octave = number_of_points / number_of_octaves log_data = _distribute_over_log(input_data, f_min, f_max, number_of_points) window_length = points_per_octave / nth_octave if window_type == 'hamming': window = np.hamming(window_length) elif window_type == 'bartlett': window = np.bartlett(window_length) elif window_type == 'blackman': window = np.blackman(window_length) elif window_type == 'hanning': window = np.hanning(window_length) output = np.convolve(window / window.sum(), log_data, mode='same') return output
def calibrate_adc_snapshot(self, raw_data): """ Calibrates a raw ADC count timedomain snapshot. Returns ADC samples in V, ADC spectrum in dBm, input spectrum in dBm and input spectrum of n_chans in dBm. """ ret = {} ret['adc_mixed'] = numpy.array(raw_data) if self.config['flip_spectrum']: ret['adc_mixed'][::2] *= -1 ret['adc_v'] = ret['adc_mixed']*self.config['adc_v_scale_factor'] ret['input_v'] = ret['adc_v']*self.get_input_scale_factor() n_accs = len(raw_data)/self.config['n_chans']/2 window = numpy.hamming(self.config['n_chans']*2) spectrum = numpy.zeros(self.config['n_chans']) ret['n_accs'] = n_accs # if n_accs < 1: # n_accs = 1 for acc in range(n_accs): # print "for acc ", acc spectrum +=\ numpy.abs((numpy.fft.rfft( ret['adc_v'][self.config['n_chans']*2*acc:\ self.config['n_chans']*2*(acc+1)]*window)[0:self.config['n_chans']])) ret['adc_spectrum_dbm'] =\ 20*numpy.log10(spectrum/n_accs/self.config['n_chans']*6.14) ret['input_spectrum_dbm'] = ret[ 'adc_spectrum_dbm']-(self.config['system_bandpass']) if self.config['antenna_bandpass_calfile'] != 'none': ret['input_spectrum_dbuv'] = dbm_to_dbuv(ret[ 'input_spectrum_dbm']) + self.config['antenna_factor'] return ret
def chromagram(x,fs,length=[],minFreq=27.5,octaves=8,bins=12,thresh=0,window=[],step=[],k=[],verbose=False): # Setup variables if not length: length = np.ceil(fs/50) if not isinstance(window,np.ndarray): window = np.hamming(length) if window.size != length: raise Exception('Window lengths do not match!') if not step: step = np.floor(length/2) nsteps = int(np.floor((x.shape[0]-length)/step) + 1) c = np.zeros((nsteps,bins)) # Create kernel if k == []: k = kernel(minFreq,octaves,fs,bins=bins,thresh=thresh) for ind in range(nsteps): if verbose: print(ind,'/',nsteps) selection = x[ind*step:ind*step+length] c[ind,] += chroma(selection,k,bins=bins) return c # def chromagramviz(c): # plt.pcolor(np.fliplr(c.transpose())) # plt.show()
def make_spectrogram(self, seg_length, win_flag=True): # computes the spectrogram of the wave # # seg_length: number of samples in each segment # win_flag: boolean, whether to apply hamming window to each segment # # return: Spectrogram if win_flag: window = np.hamming(seg_length) # sequence of multipliers that are the same length as the wave segment i = 0 j = seg_length step = seg_length / 2 # map from time to spectrum spec_map = {} while j < len(self.ys): segment = self.slice(i, j) if win_flag: segment.ys *= window # apply window function to the wave segment t = (segment.start + segment.end) / 2 # the nominal time for this segment is the midpoint spectrum = segment.make_spectrum() spec_map[t] = spectrum i += step j += step return Spectrogram(spec_map, seg_length)
def getPerio(ts, freq=None, sampFreq=1., tapeWindow=None): ''' Get the periodogram of ts using a taping window of length tape window''' nt = ts.shape[0] # If no tapeWindow given then do not tape if tapeWindow is None: tapeWindow = nt nTapes = int(nt / tapeWindow) window = np.hamming(tapeWindow) # Get frequencies if not given if freq is None: freq = getFreqPow2(tapeWindow, sampFreq=sampFreq) nfft = freq.shape[0] # Get periodogram averages over nTapes windows perio = np.zeros((nfft,)) perioSTD = np.zeros((nfft,)) for tape in np.arange(nTapes): tsTape = ts[tape*tapeWindow:(tape+1)*tapeWindow] tsTape -= tsTape.mean(0) tsWindowed = tsTape * window # Fourier transform and shift zero frequency to center fts = np.fft.fft(tsWindowed, nfft, 0) fts = np.fft.fftshift(fts) # Get periodogram perio += np.abs(fts)**2 / np.sum(np.abs(fts)**2) perioSTD += (np.abs(fts)**2 / np.sum(np.abs(fts)**2))**2 perio /= nTapes perioSTD = np.sqrt(perioSTD / nTapes) return (freq, perio, perioSTD)
def mfcc(s, fs): #Constants N = 256 M = 100 P = 30 l = int(math.ceil((s.size-N+1)/M)) #Allocate c array c = np.zeros((P,l)); for x in range(0,l-1): #Frame start = x * M; frame = s[start:start+N]; #Window w = np.hamming(N) windFrame = frame * w #FFT frameFFT = np.fft.fft(windFrame) #Mel-Frequency Wrapping m = get_filterbanks(P,N,fs) n2 = math.floor(N/2) ms = np.dot(m , abs(np.power(frameFFT[0:n2+1],2))) #Last step, compute mel-frequency cepstrum coefficients c[:,x] = fft.dct(np.log(ms.clip(min=0.00001))); np.delete(c,0,0) # exclude 0'th order cepstral coefficient return c
def framesig(sig, frame_len, frame_step, winfunc=lambda x:numpy.ones((1, x))): """Frame a signal into overlapping frames. :param sig: the audio signal to frame. :param frame_len: length of each frame measured in samples. :param frame_step: number of samples after the start of the previous frame that the next frame should begin. :param winfunc: the analysis window to apply to each frame. By default no window is applied. :returns: an array of frames. Size is NUMFRAMES by frame_len. """ slen = len(sig) frame_len = int(round(frame_len)) frame_step = int(round(frame_step)) if slen <= frame_len: numframes = 1 else: numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step)) padlen = int((numframes - 1) * frame_step + frame_len) zeros = numpy.zeros((padlen - slen,)) padsignal = numpy.concatenate((sig, zeros)) indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T indices = numpy.array(indices, dtype=numpy.int32) frames = padsignal[indices] win = numpy.tile(numpy.hamming(frame_len), (numframes, 1)) return frames * win
continue break return pixels_gandalf_random fft_plot_filter = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']), alpha_decay=0.5, alpha_rise=0.99) mel_gain = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']), alpha_decay=0.01, alpha_rise=0.99) mel_smoothing = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']), alpha_decay=0.5, alpha_rise=0.99) # volume = dsp.ExpFilter(config['MIN_VOLUME_THRESHOLD'], alpha_decay=0.02, alpha_rise=0.02) fft_window = np.hamming( int(config['MIC_RATE'] / config['FPS']) * config['N_ROLLING_HISTORY']) prev_fps_update = time.time() def microphone_update(audio_samples): global y_roll, prev_rms, prev_exp, prev_fps_update, n_frame # Normalize samples between 0 and 1 y = audio_samples / 32767.0 # Construct a rolling window of audio samples y_roll[:-1] = y_roll[1:] y_roll[-1, :] = np.copy(y) y_data = np.concatenate(y_roll, axis=0).astype(np.float32) vol = np.max(np.abs(y_data)) if vol < config['MIN_VOLUME_THRESHOLD']: if config['TURN_OFF_ON_SILENCE']:
def makeMask(matrixSize, shape='circle', radius=1.0, center=(0.0, 0.0), range=[-1, 1], fringeWidth=0.2): """ Returns a matrix to be used as an alpha mask (circle,gauss,ramp) :Parameters: matrixSize: integer the size of the resulting matrix on both dimensions (e.g 256) shape: 'circle','gauss','ramp' (linear gradient from center), 'raisedCosine' (the edges are blurred by a raised cosine) shape of the mask radius: float scale factor to be applied to the mask (circle with radius of [1,1] will extend just to the edge of the matrix). Radius can asymmetric, e.g. [1.0,2.0] will be wider than it is tall. center: 2x1 tuple or list (default=[0.0,0.0]) the centre of the mask in the matrix ([1,1] is top-right corner, [-1,-1] is bottom-left) fringeWidth: float (0-1) The proportion of the raisedCosine that is being blurred. range: 2x1 tuple or list (default=[-1,1]) The minimum and maximum value in the mask matrix """ rad = makeRadialMatrix(matrixSize, center, radius) if shape == 'ramp': outArray = 1 - rad elif shape == 'circle': #outArray=numpy.ones(matrixSize,'f') outArray = numpy.where(numpy.greater(rad, 1.0), 0.0, 1.0) elif shape == 'gauss': outArray = makeGauss(rad, mean=0.0, sd=0.33333) elif shape == 'raisedCosine': hamming_len = 1000 # This affects the 'granularity' of the raised cos fringe_proportion = fringeWidth # This one affects the proportion of the # stimulus diameter that is devoted to the # raised cosine. rad = makeRadialMatrix(matrixSize, center, radius) outArray = numpy.zeros_like(rad) outArray[numpy.where(rad < 1)] = 1 raised_cos_idx = numpy.where( [numpy.logical_and(rad <= 1, rad >= 1 - fringe_proportion)])[1:] # Make a raised_cos (half a hamming window): raised_cos = numpy.hamming(hamming_len)[:hamming_len / 2] raised_cos -= numpy.min(raised_cos) raised_cos /= numpy.max(raised_cos) # Measure the distance from the edge - this is your index into the hamming window: d_from_edge = numpy.abs((1 - fringe_proportion) - rad[raised_cos_idx]) d_from_edge /= numpy.max(d_from_edge) d_from_edge *= numpy.round(hamming_len / 2) # This is the indices into the hamming (larger for small distances from the edge!): portion_idx = (-1 * d_from_edge).astype(int) # Apply the raised cos to this portion: outArray[raised_cos_idx] = raised_cos[portion_idx] #Sometimes there are some remaining artifacts from this process, get rid of them: artifact_idx = numpy.where(numpy.logical_and(outArray == 0, rad < 0.99)) outArray[artifact_idx] = 1 artifact_idx = numpy.where(numpy.logical_and(outArray == 1, rad > 0.99)) outArray[artifact_idx] = 0 else: raise ValueError('Unknown value for shape argument %s' % shape) mag = range[1] - range[0] offset = range[0] return outArray * mag + offset
# 窗函数(矩形、汉明、汉宁) import matplotlib.pyplot as plt import numpy as np N = 32 nn = [i for i in range(N)] plt.figure(figsize=(16, 12)) plt.subplot(3, 1, 1) plt.stem(np.ones(N)) plt.title('Rectangle window') # w = 0.54 - 0.46 * np.cos(np.multiply(nn, 2 * np.pi) / (N - 1)) w = np.hamming(N) plt.subplot(3, 1, 2) plt.stem(w) plt.title('Hamming window') # w = 0.5 * (1 - np.cos(np.multiply(nn, 2 * np.pi) / (N - 1))) w = np.hanning(N) plt.subplot(3, 1, 3) plt.stem(w) plt.title('Hanning window') plt.savefig('images/window.png') plt.show() plt.close()
i = (t - t0) / fl_n2 # s = wav[center - cuttime/2*fs : center + cuttime/2*fs] s = wav[t:t + fl_n] # import pdb;pdb.set_trace(); #for debug # plot(s); mypltshow('tmp/y0i{}t{}-{}.eps'.format(i,t,t+fl_n)); mp = np.sum(s**2) / fl_n #mean power mP.append(mp) if mp < args.mp_th: continue # プリエンファシスフィルタをかける p = 0.97 # プリエンファシス係数 s = preEmphasis(s, p) # plot(s); mypltshow('tmp/y1i{}t{}-{}.eps'.format(i,t,t+fl_n)); # ハミング窓をかける if args.HW == 1: hammingWindow = np.hamming(len(s)) s = s * hammingWindow # s = s * hammingWindow # LPC係数を求める # lpcOrder = 32 lpcOrder = args.k # LPC係数の次数 r = autocorr(s, lpcOrder + 1) if r[0] != 0: # import pdb;pdb.set_trace(); #for debug a, e = LevinsonDurbin(r, lpcOrder) if np.linalg.norm( a[1:-1] > 0.05): #20210502 for removing log|p|=-578.... # if np.any(a[1:-1]!=0): A.append(a) #print "*** result ***" E.append(e)
indexFile = '%s_%s_%d_%05d_%05d.txt' \ % (indexChoice[0], restartState, S*10, firstYear, lastYearRng[k]) os.system('mkdir %s %s/perio 2> /dev/null' % (dstDir, dstDir)) print 'Reading index file %s...' % indexFile observable = np.loadtxt('%s/%s' % (indicesPath, indexFile)) ntFull = observable.shape[0] obsName += '_%s' % indexChoice[0] # Get time steps array time = np.arange(spinup, ntFull) nt = ntFull - spinup observable = observable[spinup:] # Get periodogram print 'Getting periodogram...' window = np.hamming(nt) # Get nearest larger power of 2 if np.log2(nt) != int(np.log2(nt)): nfft = 2**(int(np.log2(nt)) + 1) else: nfft = nt # Get frequencies and shift zero frequency to center freq = np.fft.fftfreq(nfft, d=1./sampFreq) freq = np.fft.fftshift(freq) freqYear = freq * daysPerYear # Apply window and remove sample mean tsWindowed = observable * window tsWindowed -= tsWindowed.mean() # Fourier transform and shift zero frequency to center fts = np.fft.fft(tsWindowed, nfft, 0)
def calc_fbank(url, config): """Calculate Fbank feature of a audio file. Parameters ---------- url : ``str`` Path to the audio file. Returns ------- fbank : ``np.ndarray`` Fbank feature of this audio. """ sample_rate, signal = read(url) pre_emphasis = 0.97 #config.Audio_emphasis frame_size = 0.025 #config.Audio_frame_size frame_stride = 0.01 #config.Audio_frame_stride NFFT = 512 #config.Audio_NFFT nfilt = config.Audio_n_filt emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1]) # convert from seconds to samples frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate signal_length = len(emphasized_signal) frame_length = int(round(frame_length)) frame_step = int(round(frame_step)) # Make sure that we have at least 1 frame num_frames = int( np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) pad_signal_length = num_frames * frame_step + frame_length z = np.zeros((pad_signal_length - signal_length)) # Pad Signal to make sure that all frames have equal number of samples # without truncating any samples from the original signal pad_signal = np.append(emphasized_signal, z) indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \ np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T frames = pad_signal[indices.astype(np.int32, copy=False)] frames *= np.hamming(frame_length) mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) # Magnitude of the FFT pow_frames = ((1.0 / NFFT) * ((mag_frames)**2)) # Power Spectrum low_freq_mel = 0 high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700) ) # Convert Hz to Mel mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz bin = np.floor((NFFT + 1) * hz_points / sample_rate) fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1)))) for m in range(1, nfilt + 1): f_m_minus = int(bin[m - 1]) # left f_m = int(bin[m]) # center f_m_plus = int(bin[m + 1]) # right for k in range(f_m_minus, f_m): fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) for k in range(f_m, f_m_plus): fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) filter_banks = np.dot(pow_frames, fbank.T) filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability filter_banks = 20 * np.log10(filter_banks) filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8) filter_banks = cmvn(filter_banks) return filter_banks
def dab_run(snr_list, file_name="dab_out", mode='dab'): output_file_folder = os.path.join("data_eval", mode) # removing previous enhancements for file in os.listdir(os.path.join("data_eval", "dnn1_out")): file_path = os.path.join("data_eval", "dnn1_out", file) os.remove(file_path) dnn1_inputs, dnn1_outputs = dnn1.predict_folder( os.path.join("data_eval", "dnn1_in"), os.path.join("data_eval", "dnn1_out")) names = [ f for f in sorted(os.listdir(os.path.join("data_eval", "dnn1_out"))) if f.startswith("enh") ] dnn1_outputs = [] for (cnt, na) in enumerate(names): # Load feature. file_path = os.path.join("data_eval", "dnn1_out", na) (a, _) = pp.read_audio(file_path) enh_complex = pp.calc_sp(a, 'complex') dnn1_outputs.append(enh_complex) # s2nrs = dnn2.predict("data_eval/dnn1_in", "data_eval/dnn1_out") # snr = np.array([5.62, 1.405, 0.703, 0.281]) # snr = np.array([5.62, 2.81, 1.875, 1.406]) s2nrs = snr_list * 1 for i in range(len(snr_list)): s2nrs[i] = 1 / (1 + 1 / snr_list[i]) ch_rw_outputs = [] # calculate channel weights if mode == 'dab': new_weights = channel_weights(s2nrs) print(new_weights) # multiply enhanced audio for the corresponding weight for i, p in zip(dnn1_outputs, new_weights): ch_rw_outputs.append(p * i) # cancel reweighting if db mode if mode == 'db': new_weights = s2nrs print(new_weights) ch_rw_outputs = dnn1_outputs # execute mvdr final = mvdr(dnn1_inputs, ch_rw_outputs) (init, _) = pp.read_audio(os.path.join('data_eval', 'test_speech', file_name)) init_sp = pp.calc_sp(init, mode='complex') visualize(dnn1_colors(np.abs(init_sp)), dnn1_colors(np.abs(final)), "source amplitude", "final amplitude") # Recover and save enhanced wav pp.create_folder(output_file_folder) s = recover_wav_complex(final, conf1.n_overlap, np.hamming) s *= np.sqrt((np.hamming( conf1.n_window)**2).sum()) # Scaler for compensate the amplitude audio_path = os.path.join(output_file_folder, file_name) pp.write_audio(audio_path, s, conf1.sample_rate) print('%s done' % mode)
def PlotHamming(data): hamm = np.hamming(len(data)) y = hamm * data y = abs(fft(y, n=16384)) y = y[:y.size // 2] return y
def spectrogram_image(mediafile, dpi=72, outdir=None, outfile=None): # TODO: Add some of the constants below as parameters """ Create spectrogram image from audio data. Return path to created image file. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import scipy.io.wavfile import numpy as np import pylab # Output file path outfile = outfile or "" if outdir and outfile and os.sep in outfile: raise ValueError( "Do not specify paths in both output directory '%s' and filename '%s'" % (outdir, outfile)) if os.sep not in outfile: if not outfile: outfile = os.path.splitext(os.path.basename(mediafile))[0] + ".jpg" if not outdir: outdir = os.path.dirname(mediafile) outfile = os.path.join(outdir, outfile) with closing(open(os.devnull, "wb")) as black_hole: # Read audio data with transcode.to_wav(mediafile) as wavfile: sys.stdout, saved_stdout = black_hole, sys.stdout try: sample_rate, waveform = scipy.io.wavfile.read(wavfile) finally: sys.stdout = saved_stdout # Limit data to 10 second window from the middle, else the FFT needs ages data_window = sample_rate * 2 # secs waveform = [ i[0] for i in waveform[(len(waveform) - data_window) // 2:(len(waveform) + data_window) // 2] ] # TODO: combine / add the channels to mono # Calculate FFT inputs nstep = int(sample_rate * 0.001) # 1ms step nfft = nwin = int(sample_rate * 0.005) & ~1 # 5ms window window = np.hamming(nwin) # Create spectrogram pylab.nipy_spectral() for khz in (5, 10, 16, 18, 20): pylab.text(data_window / sample_rate * .99, khz * 1000 + 75, "%d kHz" % khz, ha="right") pylab.axhline(khz * 1000) pylab.axis("off") pylab.specgram(waveform, NFFT=nfft, Fs=sample_rate, window=window) # Write to image try: pylab.savefig(outfile + ".png", format='png', facecolor="#000000", edgecolor="#000000", dpi=dpi, transparent=True, bbox_inches="tight") cmd = [ config.CMD_IM_CONVERT, "-trim", "-quality", "85", outfile + ".png", outfile ] subprocess.check_call(cmd, stdout=black_hole, stderr=subprocess.STDOUT) finally: if os.path.exists(outfile + ".png"): os.remove(outfile + ".png") return outfile
def file_feature_extraction(file, win=0.032, step=0.016, amplitudeFilter=False, diffFilter=False): # read in digital signal from audio file audioInfo = read(file) fs = audioInfo[0] # fs = frames/second = rate signal = audioInfo[1] # signal = data # Converting stereo signal to MONO signal if len(signal[0]) > 1: signal = np.float_(np.sum(signal, axis=1)) / 2 # short-term feature extraction numberOfSamples = len(signal) duration = np.float_(numberOfSamples) / fs # in seconds # convert window length and step from seconds to samples windowLength = np.int(np.round(win * fs)) stepInSamples = np.int(np.round(step * fs)) # compute the total number of frames numOfFrames = np.int( np.floor((numberOfSamples - windowLength) / stepInSamples) + 1) # number of features to be computed: numbOfFeatures = 21 Features = np.zeros((numOfFrames, numbOfFeatures)) # Frequency-domain audio features # MFCC Ham = np.hamming(windowLength) mfccParams = feature_mfccs_init.feature_mfccs_init(windowLength, fs) Win = np.int(windowLength) nFFT = Win / 2 curPos = 1 ampl_vals = [] diff_vals = [] for i in range(0, numOfFrames): # for each frame # get current frame:\ frame = signal[curPos - 1:curPos + windowLength - 1] if i == 0: frameprev = frame.copy() ampl_val = np.max(frame) # - np.min(frame) ampl_vals.append(ampl_val) diff_val = np.subtract(frameprev, frame) diff_vals.append(np.mean(diff_val)) frameprev = frame.copy() frame = frame * Ham frameFFT = getDFT.getDFT(frame, fs) X = np.abs(np.fft.fft(frame)) X = X[0:nFFT] # normalize fft X = X / len(X) if i == 0: Xprev = X.copy() if np.sum(np.abs(frame)) > np.spacing(1): MFCCs = feature_mfccs.feature_mfccs(frameFFT, mfccParams) Features[i][0:13] = MFCCs else: Features[:, i] = np.zeros(numbOfFeatures, 1) Features[i][13] = stEnergy(frame) Features[i][14] = stZCR(frame) Features[i][15] = stEnergyEntropy(frame) [Features[i][16], Features[i][17]] = stSpectralCentroidAndSpread(X, fs) Features[i][18] = stSpectralEntropy(X) Features[i][19] = stSpectralRollOff(X, 0.90, fs) curPos = curPos + stepInSamples frameFFTPrev = frameFFT Xprev = X.copy() ampl_threshold = np.percentile(ampl_vals, 93) diff_threshold = np.percentile(diff_vals, 80) for i in range(0, numOfFrames): if amplitudeFilter and ampl_vals[i] < ampl_threshold: Features[i][20] = 1.0 elif diffFilter and diff_vals[i] > diff_threshold: Features[i][20] = 1.0 else: Features[i][20] = 0.0 return Features
def processWithPV(waveIn, pars, BPF, doPitchShift=False): sr = pars['main_pars']['sr'] inSamples = pars['main_pars']['inSamples'] if doPitchShift == False: # static stretch if BPF.size == 2: rateVec = 1/BPF[0,1] # >1: dilation (stretching), <1: compression numSeg = 1 else: sampleVec = np.rint(BPF[:,0]*sr) # in samples rateVec = BPF[:,1] numSeg = BPF.shape[0] else: bins_per_octave = 12 # static pitch shift if BPF.size == 2: rateVec = 2.0 ** (-float(BPF[0,1]/100.0) / bins_per_octave) # in libRosa, pitch shifting factors are in semitones. In CLEESE, in cents. numSeg = 1 else: sampleVec = np.rint(BPF[:,0]*sr) # in samples rateVec = 2.0 ** (BPF[:,1] / float(100*bins_per_octave)) numSeg = BPF.shape[0] winLen = pars['ana_pars']['anaWinLen'] # in seconds n_fft = int(2**np.ceil(np.log2(winLen*sr))) # next pow 2 overlapFactor = pars['ana_pars']['oversampling'] synHop = n_fft//overlapFactor if numSeg==1: anaHop = int(round(synHop*rateVec)) numFrames = int(round(inSamples/anaHop)) + overlapFactor anaHopVec = np.ones(numFrames, dtype=int) * anaHop else: pos = 0 currHop = int(round(synHop/rateVec[0])) anaHopVec = currHop allRatesVec = rateVec[0] while pos <= inSamples: pos += currHop sampledBPF = np.interp(pos,sampleVec,rateVec) currHop = int(round(synHop/sampledBPF)) anaHopVec = np.append(anaHopVec,currHop) allRatesVec = np.append(allRatesVec,sampledBPF) win = np.hamming(n_fft) stftMat = stft(waveIn,win,n_fft,anaHopVec) stftMat = np.squeeze(stftMat) stftMat = stftMat[0:n_fft//2+1,:] phase_locking = 1 stft_stretch = phaseVocoder_varHop(stftMat, anaHopVec=anaHopVec, synHop=synHop, phase_locking=phase_locking) if doPitchShift==0: waveOut = istft(stft_stretch,win,n_fft,synHop) else: if numSeg==1: waveOut = istft(stft_stretch,win,n_fft,synHop) n_samples = int(np.ceil(len(waveOut) * rateVec)) waveOut = sig.resample_poly(waveOut,n_samples,len(waveOut)) else: waveOut = istft_resamp(stft_stretch,win,n_fft,synHop,allRatesVec,inSamples) return waveOut
def hamming(self): self.ws *= np.hamming(len(self.ws))
signal[1:] - pre_emphasis * signal[:-1]) frame_size = 0.025 frame_stride = 0.01 frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate signal_length = len(emphasized_signal) frame_length = int(round(frame_length)) frame_step = int(round(frame_step)) num_frames = int( np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) pad_signal_length = num_frames * frame_step + frame_length z = np.zeros((pad_signal_length - signal_length)) pad_signal = np.append(emphasized_signal, z) indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile( np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T frames = pad_signal[indices.astype(np.int32, copy=False)] frames *= np.hamming(frame_length) m = frames.max(axis=1) silent_frames = np.array(np.where(frames.max(axis=1) < 500)) frames_final = np.delete(frames, silent_frames[:, :], axis=0) signal_final2 = python_speech_features.sigproc.deframesig( frames_final, len(emphasized_signal), frame_length, frame_step) plt.plot(signal_final2) import numpy as np import scipy.io.wavfile from matplotlib import cm import matplotlib.pyplot as plt from scipy.fftpack import dct import python_speech_features from scipy import io sample_rate, signal = scipy.io.wavfile.read(
from scipy.signal import hamming, triang, blackmanharris from scipy.fftpack import fft, ifft import math import sys, os, functools, time sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import dftModel as DFT import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/oboe-A4.wav') N = 512 M = 511 t = -60 w = np.hamming(M) start = .8 * fs hN = N / 2 hM = (M + 1) / 2 x1 = x[start:start + M] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, hN, t) pmag = mX[ploc] freqaxis = fs * np.arange(N / 2) / float(N) plt.figure(1, figsize=(9.5, 5.5)) plt.subplot(2, 1, 1) plt.plot(freqaxis, mX, 'r', lw=1.5) plt.axis([300, 2500, -70, max(mX)]) plt.plot(fs * ploc / N,
# MatplotlibをTkinterで使用するために必要 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk size_frame = 4096 # フレームサイズ SR = 16000 # サンプリングレート size_shift = 16000 / 100 # シフトサイズ = 0.001 秒 (10 msec) # 音声ファイルを読み込む x, _ = librosa.load('aiueo.wav', sr=SR) # ファイルサイズ(秒) duration = len(x) / SR # ハミング窓 hamming_window = np.hamming(size_frame) # スペクトログラムを保存するlist spectrogram = [] # フレーム毎にスペクトルを計算 for i in np.arange(0, len(x) - size_frame, size_shift): # 該当フレームのデータを取得 idx = int(i) # arangeのインデクスはfloatなのでintに変換 x_frame = x[idx:idx + size_frame] # スペクトル fft_spec = np.fft.rfft(x_frame * hamming_window) fft_log_abs_spec = np.log(np.abs(fft_spec)) spectrogram.append(fft_log_abs_spec)
def hamming(self): """Apply a Hamming window to the wave. """ self.ys *= np.hamming(len(self.ys))
def run(self, cam): print 'start' self.frame_out = self.frame_in self.gray = cv2.equalizeHist( cv2.cvtColor(self.frame_in, cv2.COLOR_BGR2GRAY)) col = (100, 255, 100) detected = list( self.face_cascade.detectMultiScale(self.gray, scaleFactor=1.3, minNeighbors=4, minSize=(50, 50), flags=cv2.CASCADE_SCALE_IMAGE)) if len(detected) > 0: detected.sort(key=lambda a: a[-1] * a[-2]) #self.data_buffer, self.times, self.trained = [], [self.times[-1]], False if self.shift(detected[-1]) > 10: self.face_rect = detected[-1] forehead1 = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) self.draw_rect(self.face_rect, col=(255, 0, 0)) x, y, w, h = self.face_rect cv2.putText(self.frame_out, "Face", (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, col) self.draw_rect(forehead1) x, y, w, h = forehead1 cv2.putText(self.frame_out, "Forehead", (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, col) print 'haha' if set(self.face_rect) == set([1, 1, 2, 2]): print 'break' return vals = self.get_subface_means(forehead1) #print vals self.data_buffer.append(vals) #print self.data_buffer L = len(self.data_buffer) self.times.append(time.time() - self.t0) if L > self.buffer_size: self.data_buffer = self.data_buffer[-self.buffer_size:] self.times = self.times[-self.buffer_size:] L = self.buffer_size processed = np.array(self.data_buffer) self.samples = processed print L #print self.times if L > 10: self.output_dim = processed.shape[0] self.fps = float(L) / (self.times[-1] - self.times[0]) even_times = np.linspace(self.times[0], self.times[-1], L) print 'processed\n', len(processed), '\nself\n', len(self.times) interpolated = np.interp(even_times, self.times, processed) interpolated = np.hamming(L) * interpolated interpolated = interpolated - np.mean(interpolated) raw = np.fft.rfft(interpolated) phase = np.angle(raw) self.fft = np.abs(raw) self.freqs = float(self.fps) / L * np.arange(L / 2 + 1) freqs = 60. * self.freqs idx = np.where((freqs > 50) & (freqs < 180)) pruned = self.fft[idx] phase = phase[idx] pfreq = freqs[idx] self.freqs = pfreq self.fft = pruned idx2 = np.argmax(pruned) t = (np.sin(phase[idx2]) + 1.) / 2. t = 0.9 * t + 0.1 alpha = t beta = 1 - t self.bpm = self.freqs[idx2] self.idx += 1 x, y, w, h = self.get_subface_coord(0.5, 0.18, 0.25, 0.15) r = alpha * self.frame_in[y:y + h, x:x + w, 0] g = alpha * \ self.frame_in[y:y + h, x:x + w, 1] + \ beta * self.gray[y:y + h, x:x + w] b = alpha * self.frame_in[y:y + h, x:x + w, 2] self.frame_out[y:y + h, x:x + w] = cv2.merge([r, g, b]) x1, y1, w1, h1 = self.face_rect self.slices = [np.copy(self.frame_out[y1:y1 + h1, x1:x1 + w1, 1])] col = (100, 255, 100) gap = (self.buffer_size - L) / self.fps # self.bpms.append(bpm) # self.ttimes.append(time.time()) if gap: text = "(estimate: %0.1f bpm, wait %0.0f s)" % (self.bpm, gap) else: text = "(estimate: %0.1f bpm)" % (self.bpm) tsize = 1 cv2.putText(self.frame_out, text, (int(x - w / 2), int(y)), cv2.FONT_HERSHEY_PLAIN, tsize, col)
# 短時間フーリエ変換をしたときの # 総フレーム数を計算する num_frames = (num_samples - frame_size) \ // frame_shift + 1 # スペクトログラムを計算する spectrogram = np.zeros((num_frames, fft_size)) for frame_idx in range(num_frames): # 1フレーム分の波形を抽出 start_index = frame_idx * frame_shift frame = waveform[start_index : \ start_index + frame_size].copy() # 対数振幅スペクトルを計算 frame = frame * np.hamming(frame_size) spectrum = np.fft.fft(frame, n=fft_size) log_absolute = np.log(np.abs(spectrum) + 1E-7) spectrogram[frame_idx, :] = log_absolute # プロットの描画領域を作成 plt.figure(figsize=(10, 10)) # 描画領域を縦に2分割し、 # 上側に時間波形をプロットする plt.subplot(2, 1, 1) time_axis = np.arange(num_samples) / sample_frequency plt.plot(time_axis, waveform, color='k') # waveformの最大値を元に縦軸の最大値を決める ymax = np.max(np.abs(waveform)) * 1.05
results.append(d2**2 + d1**2 - w_real * d1 * d2) freqs.append(f * sample_rate) return freqs, results if __name__ == '__main__': # quick test import numpy as np import pylab # generating test signals SAMPLE_RATE = 44100 WINDOW_SIZE = 1024 t = np.linspace(0, 1, SAMPLE_RATE)[:WINDOW_SIZE] sine_wave = np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 1020 * t) sine_wave = sine_wave * np.hamming(WINDOW_SIZE) sine_wave2 = np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1500 * t) sine_wave2 = sine_wave2 * np.hamming(WINDOW_SIZE) # applying Goertzel on those signals, and plotting results freqs, results = goertzel(sine_wave, SAMPLE_RATE, (400, 500), (1000, 1100)) pylab.subplot(2, 2, 1) pylab.title('(1) Sine wave 440Hz + 1020Hz') pylab.plot(t, sine_wave) pylab.subplot(2, 2, 3) pylab.title( '(1) Goertzel Algo, freqency ranges : [400, 500] and [1000, 1100]') pylab.plot(freqs, np.array(results)[:, 2], 'o') pylab.ylim([0, 100000])
#!/usr/bin/python3 # -*- coding:utf-8 -*- import time import ADS1256 import DAC8532 import RPi.GPIO as GPIO from pylab import * import matplotlib.pyplot as plt import numpy as np N = 128 analyzeSignal = zeros(N, dtype=complex) C = 3 * 10**8 f_vco = 24.35 * 10**9 windowFunction = np.hamming(N) try: ADC = ADS1256.ADS1256() DAC = DAC8532.DAC8532() ADC.ADS1256_init() DAC.DAC8532_Out_Voltage(DAC8532.channel_A, 1) DAC.DAC8532_Out_Voltage(DAC8532.channel_B, 1) while (1): #starttime = datetime.datetime.now() for i in range(0, N): ADC_Value = ADC.ADS1256_GetIQ() I = ADC_Value[0] * 5.0 / 0x7fffff Q = ADC_Value[1] * 5.0 / 0x7fffff
import numpy as np import time, os, sys sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import stft as STFT import utilFunctions as UF import matplotlib.pyplot as plt from scipy.signal import hamming (fs, x) = UF.wavread('../../../sounds/piano.wav') w = np.hamming(1024) N = 1024 H = 512 mX, pX = STFT.stftAnal(x, fs, w, N, H) y = STFT.stftSynth(mX, pX, w.size, H) plt.figure(1, figsize=(9.5, 7)) plt.subplot(411) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.title('x (piano.wav)') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.subplot(412) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mX[0, :].size) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX, M=1024, N=1024, H=512')
def spectral_analysis(dx,Ain,res_factor=10.0,tapering=None,overlap=None,wsize=None,alpha=3.0,detrend=False,normalise=False,integration=False): """ Spectral_Analysis @summary: This function performs a spatial spectral analysis with different options on a time series of SLA profiles. @param dx {type:numeric} : sampling distance @param Ain {type:numeric} : 2D table of sla data with time along 2nd axis (NXxNT with NX the spatial length and NT the time length) @keyword tapering {type:string|bool|nd.array} : apply tapering to the data. <br \> If this keyword is of type bool : apply hamming window. <br \> If this keyword is a string : apply a hamming ('hamm'), hann ('hann'), kaiser-bessel ('kaiser'), kaiser-bessel ('blackman') or no ('none') tapering function. <br \> If this keyword is an nd.array aobject : apply this array as taper. @keyword overlap {type:float} : overlap coefficient of the windows (0.75 means 75% overlap). @keyword wsize {type:numeric} : size of the sub-segments. @keyword normalise {type:bool,default:False} : If True, normalise the spectrum by its overall energy content. @keyword detrend {type:bool,default:False} : If True, removes a linear trend to the segmented signal (if tapered) or to the whole signal (if not tapered). @keyword integration {type:bool,default:False} : If True, integrate the spectrum between 2 frequencies. @param sla {type:numeric} : data @return: a spectrum structrue with Energy Spectral Density ('esd'), Power Spectral Density ('PSD'), frequency ('fq'), wavelength ('p') and tapering parameters. @author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer @change: Created by RD, December 2012 """ A=Ain.copy() #Check dimensions sh = A.shape ndims = len(sh) N = sh[0] #Time series are found along the last dimension #If vector, add one dimension if ndims == 1 : A = A.reshape((N,1)) sh = A.shape ndims = len(sh) nr = sh[1] #Number of repeats nt = nr # gain=1.0 #Scaling gain... (used for tapering issues) # #Get the overall energy # spec=get_spec(dx, A[:,0]) # F=spec['fq'] # Eref = ((A[:,0]-A[:,0].mean())**2).sum() #Get the reference energy level # ESDref=spec['esd'] # SFactor=Eref/spec['esd'].sum() # ESDref*=SFactor # PSDref=spec['psd']*SFactor # print 'Check parseval theorem : SUM|Y(f)|�={0}, SUM|y(t)|�={1}'.format(spec['esd'].sum(),((A[:,0]-A[:,0].mean())**2).sum()) #Apply tapering if asked ######################## if tapering is not None: #Set tapering defaults overlap=0.50 if overlap is None else overlap wsize=0.5*N if wsize is None else wsize #Get time splitting (tapering) parameters ######################################### a = np.float32(wsize) b = np.float32(overlap) c = np.float32(N) nn=np.floor((c - (a * b))/(a - (a * b))) #This is the number of segments print 'Number of windows :{0}\nTotal windowed points : {1} ({2} missing)\nTotal points : {3}'.format(nn,nn*wsize,N - nn*wsize,N) ix = np.arange(nn) * ((1.0 - b) * a) #These are the starting points of each segments #Moving window ############## dum = np.zeros((wsize, nn, nr),dtype=np.float64) for j in np.arange(nr): for i in np.arange(nn): #looping through time to get splitted time series dum[:,i,j] = detrend_fun(np.arange(wsize),A[ix[i] : ix[i] + wsize,j]) if detrend else A[ix[i] : ix[i] + wsize,j] #Set up tapering window ####################### beta=np.pi*alpha hamm = np.hamming(wsize) hann = np.hanning(wsize) kbess = np.kaiser(wsize,beta) blackman = np.blackman(wsize) notaper = np.ones(wsize) #overpass tapering option gain=1.0 if isinstance(tapering,bool) : which='hamm' elif isinstance(tapering,str) : if tapering.upper() == 'HAMMING' : which='hamm' gain=np.sum(hamm)/wsize #0.530416666667 elif tapering.upper() == 'HANNING' : which='hann' gain=np.sum(hann)/wsize #0.489583333333 elif tapering.upper() == 'KAISER' : which='kbess' gain=np.sum(kbess)/wsize #0.394170357504 elif tapering.upper() == 'NONE' : which='notaper' gain=1.0 elif tapering.upper() == 'BLACKMAN' : which='blackman' gain=np.sum(blackman)/wsize else : raise Exception('Unknown taper {0}'.format(tapering)) elif isinstance(tapering,np.ndarray) : pass else : raise Exception('Bad value for tapering keyword') if not isinstance(tapering,np.ndarray) : exec('window='+which) else : window=tapering window = np.repeat(window,nn*nr).reshape((wsize,nn,nr)) #Apply tapering on segmented data A=dum.copy()*window A=A.reshape(wsize,nr*nn) #Reshapa matrix nr=nn*nr else : if detrend : for i in np.arange(nr): A[:,i] = detrend_fun(np.arange(N),A[:,i]) if detrend else A[:,i] gain=1.0 #Run transform ############### for i in np.arange(nr): spec=get_spec(dx, A[:,i],integration=integration,gain=gain,res_factor=res_factor) if i == 0: esd = spec['esd'] psd = spec['psd'] fq = spec['fq'] else : esd = np.append(esd,spec['esd']) psd = np.append(psd,spec['psd']) # factor=((A[:,0]-A[:,0].mean())**2).sum()/spec['esd'].sum() #Average spectrum ################# nf=len(fq) p=1./fq esd=esd.reshape(nr,nf) psd=psd.reshape(nr,nf) esd=(np.sum(esd,axis=0)/nr)#/gain psd=(np.sum(psd,axis=0)/nr)#/gain psd = psd * (gain**0.5) # print gain, np.sqrt(gain), gain **2, gain*0.5, gain/2. # esd=(np.sum(esd,axis=0))#/gain # psd=(np.sum(psd,axis=0))#/gain #Normalise by energy content Scaling_Factor=len(fq)/esd.sum() if normalise : esd*=Scaling_Factor psd*=Scaling_Factor if tapering is not None : return {'params':{'tapering':tapering is not None,'which':which,'wsize':int(wsize),'nwind':int(nn),'overlap':int(100.*overlap),'gain':gain},'psd':psd,'esd':esd,'fq':fq,'p':p} else : return {'params':{'tapering':tapering is not None},'psd':psd,'esd':esd,'fq':fq,'p':p}
def denoise_wav(src_wav_file, dest_wav_file, global_mean, global_var, use_gpu, gpu_id, truncate_minutes, mode, model_select, stage_select): """Apply speech enhancement to audio in WAV file. Parameters ---------- src_wav_file : str Path to WAV to denosie. dest_wav_file : str Output path for denoised WAV. global_mean : ndarray, (n_feats,) Global mean for LPS features. Used for CMVN. global_var : ndarray, (n_feats,) Global variances for LPS features. Used for CMVN. use_gpu : bool, optional If True and GPU is available, perform all processing on GPU. (Default: True) gpu_id : int, optional Id of GPU on which to do computation. (Default: 0) truncate_minutes: float Maximimize size in minutes to process at a time. The enhancement will be done on chunks of audio no greather than ``truncate_minutes`` minutes duration. """ # Read noisy audio WAV file. As scipy.io.wavefile.read is FAR faster than # librosa.load, we use the former. rate, wav_data = wav_io.read(src_wav_file) if mode == 1: print( "###Selecting the estimated ideal-ratio-masks in mode 1 (more conservative).###" ) elif mode == 2: print( "###Selecting the estimated log-power-spec features in mode 2 (more agressive).###" ) elif mode == 3: print( "###Selecting both estimated IRM and LPS outputs with equal weights in mode 3 (trade-off).###" ) print("Using the pre-trained {} speech enhancement model.".format( model_select)) # Apply peak-normalization. wav_data = utils.peak_normalization(wav_data) # Perform denoising in chunks of size chunk_length samples. chunk_length = int(truncate_minutes * rate * 60) total_chunks = int(math.ceil(wav_data.size / chunk_length)) data_se = [] # Will hold enhanced audio data for each chunk. for i in range(1, total_chunks + 1): tmp_dir = tempfile.mkdtemp() try: # Get samples for this chunk. bi = (i - 1) * chunk_length # Index of first sample of this chunk. ei = bi + chunk_length # Index of last sample of this chunk + 1. temp = wav_data[bi:ei] print('Processing file: %s, segment: %d/%d.' % (src_wav_file, i, total_chunks)) # Skip denoising if chunk is too short. if temp.shape[0] < WL2: data_se.append(temp) continue # Determine paths to the temporary files to be created. noisy_normed_lps_fn = os.path.join(tmp_dir, 'noisy_normed_lps.htk') noisy_normed_lps_scp_fn = os.path.join(tmp_dir, 'noisy_normed_lps.scp') outputs_fn = os.path.join(tmp_dir, 'irm.mat') # Extract LPS features from waveform. noisy_htkdata = utils.wav2logspec(temp, window=np.hamming(WL)) # Do MVN before decoding. normed_noisy = (noisy_htkdata - global_mean) / global_var # Write features to HTK binary format making sure to also # create a script file. #utils.write_htk( # noisy_normed_lps_fn, normed_noisy, samp_period=SR, # parm_kind=9) if model_select.lower() == '400h': utils.write_htk(noisy_normed_lps_fn, normed_noisy, samp_period=SR, parm_kind=9) elif model_select.lower() == '1000h': utils.write_htk( noisy_normed_lps_fn, noisy_htkdata, samp_period=SR, parm_kind=9 ) ### The 1000h model already integrates MVN inside itself. cntk_len = noisy_htkdata.shape[0] - 1 with open(noisy_normed_lps_scp_fn, 'w') as f: f.write('irm=%s[0,%d]\n' % (noisy_normed_lps_fn, cntk_len)) # Apply CNTK model to determine ideal ratio mask (IRM), which will # be output to the temp directory as irm.mat. In order to avoid a # memory leak, must do this in a separate process which we then # kill. #def decode_model(features_file, irm_mat_dir, feature_dim, use_gpu=True, # gpu_id=0, mode=1, model_select='400h', stage_select=3): p = Process(target=decode_model, args=(noisy_normed_lps_scp_fn, tmp_dir, NFREQS, use_gpu, gpu_id, mode, model_select, stage_select)) p.start() p.join() if p.exception: e, tb = p.exception raise type(e)(tb) # Read in IRM and directly mask the original LPS features. irm = sio.loadmat(outputs_fn)['IRM'] lps = sio.loadmat(outputs_fn)['LPS'] if mode == 1: recovered_lps = noisy_htkdata + np.log(irm) elif mode == 2: recovered_lps = (lps * global_var) + global_mean elif mode == 3: recovered_lps = 0.5 * (noisy_htkdata + np.log(irm)) + 0.5 * ( (lps * global_var) + global_mean) # Reconstruct audio. wave_recon = utils.logspec2wav(recovered_lps, temp, window=np.hamming(WL), n_per_seg=WL, noverlap=WL2) data_se.append(wave_recon) finally: shutil.rmtree(tmp_dir) data_se = [x.astype(np.int16, copy=False) for x in data_se] data_se = np.concatenate(data_se) wav_io.write(dest_wav_file, SR, data_se)
def run(self): frame, face_frame, ROI1, ROI2, status, mask = self.fd.face_detect( self.frame_in) self.frame_out = frame self.frame_ROI = face_frame g1 = self.extractColor(ROI1) g2 = self.extractColor(ROI2) #g3 = self.extractColor(ROI3) L = len(self.data_buffer) #calculate average green value of 2 ROIs #r = (r1+r2)/2 g = (g1 + g2) / 2 #b = (b1+b2)/2 if ( abs(g - np.mean(self.data_buffer)) > 10 and L > 99 ): #remove sudden change, if the avg value change is over 10, use the mean of the data_buffer g = self.data_buffer[-1] self.times.append(time.time() - self.t0) self.data_buffer.append(g) #only process in a fixed-size buffer if L > self.buffer_size: self.data_buffer = self.data_buffer[-self.buffer_size:] self.times = self.times[-self.buffer_size:] self.bpms = self.bpms[-self.buffer_size // 2:] L = self.buffer_size processed = np.array(self.data_buffer) # start calculating after the first 10 frames if L == self.buffer_size: self.fps = float(L) / ( self.times[-1] - self.times[0] ) #calculate HR using a true fps of processor of the computer, not the fps the camera provide even_times = np.linspace(self.times[0], self.times[-1], L) processed = signal.detrend( processed ) #detrend the signal to avoid interference of light change interpolated = np.interp(even_times, self.times, processed) #interpolation by 1 interpolated = np.hamming( L ) * interpolated #make the signal become more periodic (advoid spectral leakage) #norm = (interpolated - np.mean(interpolated))/np.std(interpolated)#normalization norm = interpolated / np.linalg.norm(interpolated) raw = np.fft.rfft( norm * 30) #do real fft with the normalization multiplied by 10 self.freqs = float(self.fps) / L * np.arange(L / 2 + 1) freqs = 60. * self.freqs # idx_remove = np.where((freqs < 50) & (freqs > 180)) # raw[idx_remove] = 0 self.fft = np.abs(raw)**2 #get amplitude spectrum idx = np.where( (freqs > 50) & (freqs < 180) ) #the range of frequency that HR is supposed to be within pruned = self.fft[idx] pfreq = freqs[idx] self.freqs = pfreq self.fft = pruned idx2 = np.argmax(pruned) #max in the range can be HR self.bpm = self.freqs[idx2] self.bpms.append(self.bpm) processed = self.butter_bandpass_filter(processed, 0.8, 3, self.fps, order=3) #ifft = np.fft.irfft(raw) self.samples = processed # multiply the signal with 5 for easier to see in the plot #TODO: find peaks to draw HR-like signal. if (mask.shape[0] != 10): out = np.zeros_like(face_frame) mask = mask.astype(np.bool) out[mask] = face_frame[mask] if (processed[-1] > np.mean(processed)): out[mask, 2] = 180 + processed[-1] * 10 face_frame[mask] = out[mask]
def mfcc(sample_rate, signal): pre_emphasis = 0.97 signal = signal[0:int(1 * sample_rate)] emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1]) frame_size = 0.025 frame_stride = 0.01 frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # CONVERTING TO SAMPLES signal_length = len(emphasized_signal) frame_length = int(round(frame_length)) frame_step = int(round(frame_step)) num_frames = int( numpy.ceil( float(numpy.abs(signal_length - frame_length)) / frame_step)) pad_signal_length = num_frames * frame_step + frame_length z = numpy.zeros((pad_signal_length - signal_length)) pad_signal = numpy.append(emphasized_signal, z) indices = numpy.tile(numpy.arange( 0, frame_length), (num_frames, 1)) + numpy.tile( numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T frames = pad_signal[indices.astype(numpy.int32, copy=False)] frames *= numpy.hamming(frame_length) NFFT = 512 mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT)) pow_frames = ((1.0 / NFFT) * ((mag_frames)**2)) nfilt = 40 low_freq_mel = 0 high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700)) mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2) hz_points = (700 * (10**(mel_points / 2595) - 1)) bin = numpy.floor((NFFT + 1) * hz_points / sample_rate) fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1)))) for m in range(1, nfilt + 1): f_m_minus = int(bin[m - 1]) f_m = int(bin[m]) f_m_plus = int(bin[m + 1]) for k in range(f_m_minus, f_m): fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) for k in range(f_m, f_m_plus): fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) filter_banks = numpy.dot(pow_frames, fbank.T) filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks) filter_banks = 20 * numpy.log10(filter_banks) num_ceps = 12 mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)] cep_lifter = 22 (nframes, ncoeff) = mfcc.shape n = numpy.arange(ncoeff) # print numpy.shape(mfcc) lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter) mfcc *= lift mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8) return mfcc
import os import csv import sys import matplotlib.pyplot as plt import numpy as np import pandas as pd import parselmouth import librosa import seaborn as sns import scipy.io.wavfile as wav DEFAULT_INPUT_DIR = 'raw_wav' DEFAULT_OUTPUT_DIR = 'raw_csv' WINFUNC = lambda x: np.hamming(x) def get_non_silence_idx_range_from_pitch(wav_name: str, pitch_csv_path: str) -> [int, int]: corresponding_pitch_csv = f'pitch-{wav_name.replace(".wav", ".csv")}' df = pd.read_csv(f'{pitch_csv_path}/{corresponding_pitch_csv}') candidate = [] start_idx = 0 end_idx = df.shape[0] + 1 count = 0 is_counting = False i = 0 for i in range(len(df['F0'])): is_counting = df['F0'][i] != 0 if is_counting: if count == 0:
noverlap=None, fs=sample_rate, nperseg=window_length) f2, t2, Zxx2 = stft(signal[7000:], window='boxcar', noverlap=None, fs=sample_rate, nperseg=window_length) rect_signal = [] hamm_signal = [] # Loop and calculate the signal multipled by respective window for i in range(7000, len(signal), window_length): if len(signal[i:i + window_length]) != window_length: break hamm_signal.append( (signal[i:i + window_length] * np.hamming(window_length))) rect_signal.append((signal[i:i + window_length] * scipy.signal.windows.boxcar(window_length))) fig, axs = plt.subplots(3, 2) axs[0, 0].plot(time, signal) axs[0, 0].set_title('Original waveform') axs[0, 0].set_ylabel('Amplitude') axs[0, 0].set_xlabel('Time [sec]') axs[1, 0].plot(rect_signal) axs[1, 0].set_title('Signal multipled by 160 points rectangular window') axs[1, 0].set_ylabel('Filtered amplitude') axs[2, 0].pcolormesh(t2, f2, np.abs(Zxx2), vmin=0,
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt import pandas as pd # データのパラメータ N = 36000 # サンプル数 dt = 0.01 # サンプリング間隔 fc1 = 0.5 # カットオフ周波数1[Hz] fc2 = 35.0 # カットオフ周波数2[Hz] A1, A2 = 20, 5 t = np.arange(0, N * dt, dt) # 時間軸 freq = np.linspace(0, 1.0 / dt, N) # 周波数軸 hamm = np.hamming(N) # ハミング窓 # CSVのロード df = pd.read_csv( "C:/github/sample/python/numpy/fft/strong-motion/2011-03-11-14-46-30-miyazaki-oketanimachi/ex2/data.csv", encoding="UTF-8", skiprows=6) # 3列目(UD)のデータ(加速度)終値だけを取り出し f = df["UD"] # 高速フーリエ変換(周波数信号に変換) F = np.fft.fft(f * hamm) # 正規化 + 交流成分2倍 F = F / (N / 2) # 直流成分は等倍に戻す
def FFT_AMP(data): data= data - data.mean() data=np.hamming(len(data))*data data=np.fft.fft(data) data=np.abs(data) return data
def noise_filter(self, x, fs=16000): # 计算参数 window_length = 20 * fs // 1000 # 样本中帧的大小 PERC = 50 # 窗口重叠占帧的百分比 cover_window = window_length * PERC // 100 # 重叠窗口 uncover_window = window_length - cover_window # 非重叠窗口 # 设置默认参数 Thres = 3 Expnt = 2.0 beta = 0.002 G = 0.9 # 初始化汉明窗 win = np.hamming(window_length) # normalization gain for overlap+add with 50% overlap winGain = uncover_window / sum(win) # Noise magnitude calculations - assuming that the first 5 frames is noise/silence nFFT = 2 * 2**(self._nextpow2(window_length)) noise_mean = np.zeros(nFFT) j = 0 for k in range(1, 6): noise_mean = noise_mean + abs( np.fft.fft(win * x[j:j + window_length], nFFT)) j = j + window_length noise_mu = noise_mean / 5 # --- allocate memory and initialize various variables k = 1 img = 1j x_old = np.zeros(cover_window) Nframes = len(x) // uncover_window - 1 xfinal = np.zeros(Nframes * uncover_window) # ========================= Start Processing =============================== for n in range(0, Nframes): # Windowing insign = win * x[k - 1:k + window_length - 1] # compute fourier transform of a frame spec = np.fft.fft(insign, nFFT) # compute the magnitude sig = abs(spec) # save the noisy phase information theta = np.angle(spec) SNRseg = 10 * np.log10( np.linalg.norm(sig, 2)**2 / np.linalg.norm(noise_mu, 2)**2) if Expnt == 1.0: # 幅度谱 alpha = self._berouti(SNRseg, 3) else: # 功率谱 alpha = self._berouti(SNRseg, 4) sub_speech = sig**Expnt - alpha * noise_mu**Expnt # 当纯净信号小于噪声信号的功率时 diffw = sub_speech - beta * noise_mu**Expnt # beta negative components z = [i for i, frame in enumerate(diffw) if frame < 0] if len(z) > 0: sub_speech[z] = beta * noise_mu[z]**Expnt if SNRseg < Thres: # Update noise spectrum noise_temp = G * noise_mu**Expnt + ( 1 - G) * sig**Expnt # 平滑处理噪声功率谱 noise_mu = noise_temp**(1 / Expnt) # 新的噪声幅度谱 # flipud函数实现矩阵的上下翻转,是以矩阵的“水平中线”为对称轴 # 交换上下对称元素 sub_speech[nFFT // 2 + 1:nFFT] = np.flipud(sub_speech[1:nFFT // 2]) x_phase = (sub_speech** (1 / Expnt)) * (np.array([math.cos(x) for x in theta]) + img * (np.array([math.sin(x) for x in theta]))) # take the IFFT xi = np.fft.ifft(x_phase).real # --- Overlap and add --------------- xfinal[k - 1:k + uncover_window - 1] = x_old + xi[0:cover_window] x_old = xi[0 + cover_window:window_length] k = k + uncover_window return np.array(xfinal * winGain, dtype=np.short)