def compare_ir(): fs = 16e3 # impulse x_len = np.int16(fs) x = np.zeros(x_len) x[1] = 1 gtf_obj = gtf_proposed(fs, cf_low=100, cf_high=2000, n_band=4) irs = gtf_obj.filter(x) fig = gtf_obj.plot_ir_spec(irs[:, :1000]) savefig(fig, 'proposed.png') coefs = gtf_ref.make_erb_filters(fs, gtf_obj.cfs) irs_ref = gtf_ref.erb_filterbank(x, coefs) fig = gtf_obj.plot_ir_spec(irs[:, :1000]) savefig(fig, "ref.png") irs_eq = gtf_obj.get_ir_equation() fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, tight_layout=True) ax[0].plot(irs[3] / np.max(irs[3]), label='Todd') ax[0].plot(irs_eq[3] / np.max(irs_eq[3]), label='Equation') ax[0].legend() ax[0].set_xlim([0, 200]) ax[1].plot(irs_ref[3] / np.max(irs_ref[3]), label='Detly') ax[1].plot(irs_eq[3] / np.max(irs_eq[3]), label='Equation') ax[1].legend() ax[1].set_xlim([0, 200]) savefig(fig, 'images/validate/compare.png')
def validate(): fs = 16e3 # impulse x_len = np.int16(fs) x = np.zeros(x_len) x[1] = 1 gtf_obj = gtf_proposed(fs, cf_low=100, cf_high=2000, n_band=4) irs = gtf_obj.filter(x) # fig1 = gtf_obj.plot_ir_spec(irs1[:, :1000]) # savefig(fig1, 'proposed.png') coefs = gtf_reference.make_erb_filters(fs, gtf_obj.cfs) irs_ref = gtf_reference.erb_filterbank(x, coefs) # fig2 = gtf_obj.plot_ir_spec(irs2[:, :1000]) # savefig(fig2, "reference.png") irs_eq = gtf_obj.get_ir_equation() fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, tight_layout=True) ax[0].plot(irs[3] / np.max(irs[3]), label='todd') ax[0].plot(irs_eq[3] / np.max(irs_eq[3]), label='eq') ax[0].legend() ax[0].set_xlim([0, 200]) ax[1].plot(irs_ref[3] / np.max(irs_ref[3]), label='detly') ax[1].plot(irs_eq[3] / np.max(irs_eq[3]), label='eq') ax[1].legend() ax[1].set_xlim([0, 200]) savefig(fig, 'compare.png')
def gtgram_xe(wave, fs, channels, f_min, f_max): """ Calculate the intermediate ERB filterbank processed matrix """ cfs = centre_freqs(fs, channels, f_min, f_max) fcoefs = np.flipud(gf.make_erb_filters(fs, cfs)) xf = gf.erb_filterbank(wave, fcoefs) xe = np.power(xf, 2) return xe
def process_signal(self, in_sig, verbose=False): ''' Where all the actual signal processing is done -- actually, where all the signal processing functions and methods are called. Results are all stored in "self.chunks," which is a collection of ti ''' self.in_sig = in_sig print("len: ", len(self.in_sig)) # fdl_out_chunks = [] # agc_out_chunks = [] if self.filt_type == 'bessel': filted_channels = np.zeros((self.num_chan, len(self.in_sig))) print(filted_channels.shape) for k in range(self.num_chan): if verbose: print("Processing channel %d/%d" % (k + 1, self.num_chan)) filted_channels[k] = dsp.filtfilt(self.b[k], self.a[k], in_sig) filted = filted_channels[k] f0s, idx_chunks, out_chunks, num_chunks = self.fdl[ k].process_data(filted) for j in range(num_chunks): if len(out_chunks[j]) < np.floor( 0.03 / self.dt): # dur > 30 ms continue # fdl_out_chunks.append(out_chunks[j]) out_chunks[j] = scfbutils.agc(out_chunks[j], 0.1, 0.25) out_chunks[j] = scfbutils.agc(out_chunks[j], 0.001, 0.25) # agc_out_chunks.append(out_chunks[j]) freq_est = scfbutils.pll(out_chunks[j], f0s[j], self.f_s) assert len(freq_est) == len(idx_chunks[j]) self.chunks.append((idx_chunks[j], freq_est)) elif self.filt_type == 'gammatone': filted_channels = gtf.erb_filterbank(self.in_sig, self.erb_coefs) # filted_channels = np.flipud(filted_channels) for k, filted in enumerate(filted_channels): if verbose: print("Processing channel %d/%d" % (k + 1, self.num_chan)) f0s, idx_chunks, out_chunks, num_chunks = self.fdl[ k].process_data(filted) self.out_chunks = out_chunks for j in range(num_chunks): if len(out_chunks[j]) < np.floor( 0.03 / self.dt): # dur > 30 ms continue # fdl_out_chunks.append(out_chunks[j]) out_chunks[j], amps1 = scfbutils.agc( out_chunks[j], 0.1, 0.25) out_chunks[j], amps2 = scfbutils.agc( out_chunks[j], 0.001, 0.25) # agc_out_chunks.append(out_chunks[j]) freq_est = scfbutils.pll(out_chunks[j], f0s[j], self.f_s) assert len(freq_est) == len(idx_chunks[j]) self.chunks.append((idx_chunks[j], freq_est)) else: print("Failure: given filter type unavailable.") return 0 self.filtered_channels = filted_channels self.processed = True return self.chunks # final goal, next one is for debugging
def srmr(x, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False): wLengthS = .256 wIncS = .064 # Computing gammatone envelopes if fast: mfs = 400.0 gt_env = fft_gtgram(x, fs, 0.010, 0.0025, n_cochlear_filters, low_freq) else: cfs = centre_freqs(fs, n_cochlear_filters, low_freq) fcoefs = make_erb_filters(fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(x, fcoefs))) mfs = fs wLength = int(np.ceil(wLengthS*mfs)) wInc = int(np.ceil(wIncS*mfs)) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(min_cf, max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = int(1 + (gt_env.shape[1] - wLength)//wInc) w = hamming(wLength+1)[:-1] # window is periodic, not symmetric energy = np.zeros((n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='pad') energy[i,j,:] = np.sum((w*mod_out_frame[:n_frames])**2, axis=1) if norm: energy = normalize_energy(energy) erbs = np.flipud(calc_erbs(low_freq, fs, n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = calc_cutoffs(mod_filter_cfs, fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 return np.sum(avg_energy[:, :4])/np.sum(avg_energy[:, 4:Kstar]), energy
def srmr(x, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False): wLengthS = .256 wIncS = .064 # Computing gammatone envelopes if fast: mfs = 400.0 gt_env = fft_gtgram(x, fs, 0.010, 0.0025, n_cochlear_filters, low_freq) else: cfs = centre_freqs(fs, n_cochlear_filters, low_freq) fcoefs = make_erb_filters(fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(x, fcoefs))) mfs = fs wLength = np.ceil(wLengthS*mfs) wInc = np.ceil(wIncS*mfs) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(min_cf, max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = 1 + (gt_env.shape[1] - wLength)//wInc w = hamming(wLength+1)[:-1] # window is periodic, not symmetric energy = np.zeros((n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='pad') energy[i,j,:] = np.sum((w*mod_out_frame[:n_frames])**2, axis=1) if norm: energy = normalize_energy(energy) erbs = np.flipud(calc_erbs(low_freq, fs, n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = calc_cutoffs(mod_filter_cfs, fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 return np.sum(avg_energy[:, :4])/np.sum(avg_energy[:, 4:Kstar]), energy
def calculate_heinz2001_firing_rate(_input, fs, cfs, **kwargs): """ Runs the Heinz et al. (2001) auditory nerve simulation and return firing rates Implements the Heinz et al. (2001) auditory nerve model. This model contains the following steps: - A gammatone frontend is implemented via the gammatone package (https://github.com/detly/gammatone) - A saturating nonlinearity simulating the actions of the inner hair cells (IHC) is applied - The IHC responses are lowpass filtered with 7 first-order Butterworth filters - Auditory nerve responses to the IHC inputs are simulated - this stage is implemented via Numba for speed. The implementation described in Heinz et al. (2001) is a slightly simplified version of three-stage diffusion as in Westerman and Smith (1988). Most of the parameter descriptions below in the inline documentation are taken directly from Heinz et al. (2001). Args: _input (ndarray): 1-dimensional ndarray containing an acoustic stimulus in pascals fs (int): sampling rate in Hz cfs (ndarray): ndarray containing characteristic frequencies at which to simulate responses Returns: output (ndarray): output array of instantaneous firing rates, of shape (n_cf, n_samp) Warnings: - Arguments passed via **kwargs are silently unused References: Heinz, M. G., Colburn, H. S., and Carney, L. H. (2001). "Evaluating auditory performance limits: I. One-parameter discrimination using a computational model for the auditory nerve." Neural Computation, 13(10). 2273-2316. Westerman, L. A., & Smith, R. L. (1988). A diffusion model of the transient response of the cochlear inner hair cell synapse. The Journal of the Acoustical Society of America, 83(6), 2266-2276. """ # Calculate peripheral filter outputs bm = filters.erb_filterbank(_input, filters.make_erb_filters(fs, cfs)) # Apply saturating nonlinearity K = 1225 # controls sensitivity beta = -1 # sets 3:1 asymmetric bias ihc = (np.arctan(K * bm + beta) - np.arctan(beta)) / (np.pi / 2 - np.arctan(beta)) # Apply lowpass filter [b, a] = butter(1, 4800 / (fs / 2)) for ii in range(7): ihc = lfilter(b, a, ihc, axis=1) # Apply auditory nerve + neural adaptation stage dims = ihc.shape C_I = np.zeros_like(ihc) # immediate concentration ("spikes/volume") C_L = np.zeros_like(ihc) # local concentration ("spikes/volume") return _calculate_heinz2001_rate_internals(dims, fs, ihc, C_I, C_L)
def get_gfcc(self, signal, ccST=1, ccEND=23): """ Get GFCC feature. """ erb_filterbank = filters.erb_filterbank(numpy.array(signal), self.erb_filter) inData = erb_filterbank[10:, :] [chnNum, frmNum] = numpy.array(inData).shape mtx = self.dct_matrix(chnNum) outData = numpy.matmul(mtx, inData) outData = outData[ccST:ccEND, :] gfcc_feat = numpy.array( [numpy.mean(data_list) for data_list in outData]).copy() return gfcc_feat
def _computeSingleFrameFeature(self,sig): '''Feature computation for a single time-series frame/segment Args: sig (numpy array): The signal segment for which feature will be computed Returns: feature (numpy array): Computed feature vector 單個時間序列幀/段的特徵計算 (只限 ”SubEnv” 子帶包絡(Sub-band envelopes)特徵計算) - 輸入變數 : sig (numpy array) - 輸出變數 : feature (numpy array) ''' if self.name=='SubEnv': '''Sub-band envelopes feature computation 子帶包絡特徵計算''' #Computing sub-band signals /計算子帶信號 timeRes=self.dimensions[0] numBands=self.dimensions[1] low_cut_off=2#lower cut off frequency = 2Hz /較低的截止頻率= 2Hz centre_freqVals = centre_freqs(self.samplerate,numBands,low_cut_off) fcoefs = make_erb_filters(self.samplerate, centre_freqVals, width=1.0) y = erb_filterbank(sig, fcoefs) subenv = np.array([]).reshape(timeRes,0) for i in range(numBands): subBandSig=y[i,:] analytic_signal = hilbert(subBandSig) amp_env = np.abs(analytic_signal) np.nan_to_num(amp_env) #amp_env=resampy.resample(amp_env, len(amp_env), timeRes, axis=-1)#resampy library used resampling /resampy庫使用重新取樣 #resampling may lead to unexpected computation errors, /重新採樣可能會導致意外的計算錯誤, #I prefered average amplitudes for short-time windows /我更喜歡短時間窗口的平均幅度 downSampEnv=np.zeros((timeRes,1)) winSize=int(len(amp_env)/timeRes) for ind in range(timeRes): downSampEnv[ind]=np.log2(np.mean(amp_env[ind*winSize:(ind+1)*winSize])) subenv=np.hstack([subenv,downSampEnv]) #removing mean and normalizing /刪除均值和正常化 subenv=subenv-np.mean(subenv) subenv=subenv/(np.max(np.abs(subenv))) feature=subenv else: print('Error: feature '+self.name+' is not recognized') feature=[] return feature
d = 0.89 #amount to remove detected signal from residual time = 100 #time to start analyzing audio delta_time = 100 # time between analysis frames notes = np.zeros(6, dtype=float) input_signal = wave.read('16kHz_acTuned.wav') fs = input_signal[0] T = 1 / fs audio = np.asarray(input_signal[1]) while time < (len(audio) - fs * time / 1000): trim_audio = clip_audio(audio, time) center_freqs = gt.erb_space(lowf, highf, num_freqs) filt_coefs = gt.make_erb_filters(fs, center_freqs) channels = gt.erb_filterbank(trim_audio, filt_coefs) process_chan = np.empty( [len(channels), len(channels[0]) * 2], dtype=complex) mag_chan = np.empty_like(process_chan, dtype=float) #Cochlea simulation for idx in range(len(channels)): process_chan[idx, :] = compression(channels[idx, :]) process_chan[idx, :] = half_wave_rectification(process_chan[idx, :]) low_cutoff = center_freqs[idx] * 1.5 process_chan[idx, :] = butter_lfilter(process_chan[idx, :], low_cutoff, fs) mag_chan[idx, :] = np.absolute(np.fft.fft(process_chan[idx, :])) #summation of DFT magnitudes across each channel
def gammatone_bank( wav: NDVar, f_min: float, f_max: float, n: int, integration_window: float = 0.010, tstep: float = None, location: str = 'right', pad: bool = True, name: str = None, ) -> NDVar: """Gammatone filterbank response Parameters ---------- wav : NDVar Sound input. f_min : scalar Lower frequency cutoff. f_max : scalar Upper frequency cutoff. n : int Number of filter channels. integration_window : scalar Integration time window in seconds (default 10 ms). tstep : scalar Time step size in the output (default is same as ``wav``). location : str Location of the output relative to the input time axis: - ``right``: gammatone sample at end of integration window (default) - ``left``: gammatone sample at beginning of integration window - ``center``: gammatone sample at center of integration window Since gammatone filter response depends on ``integration_window``, the filter response will be delayed relative to the analytic envlope. To ignore this delay, use `location='left'` pad : bool Pad output to match time axis of input. name : str NDVar name (default is ``wav.name``). Notes ----- Requires the ``fmax`` branch of the gammatone library to be installed: $ pip install https://github.com/christianbrodbeck/gammatone/archive/fmax.zip """ from gammatone.filters import centre_freqs, erb_filterbank from gammatone.gtgram import make_erb_filters wav_ = wav if location == 'left': if pad: wav_ = _pad_func(wav, wav.time.tmin - integration_window) elif location == 'right': # tmin += window_time if pad: wav_ = _pad_func(wav, tstop=wav.time.tstop + integration_window) elif location == 'center': dt = integration_window / 2 # tmin += dt if pad: wav_ = _pad_func(wav, wav.time.tmin - dt, wav.time.tstop + dt) else: raise ValueError(f"mode={location!r}") fs = 1 / wav.time.tstep if tstep is None: tstep = wav.time.tstep wave = wav_.get_data('time') # based on gammatone library, rewritten to reduce memory footprint cfs = centre_freqs(fs, n, f_min, f_max) integration_window_len = int(round(integration_window * fs)) output_n_samples = floor((len(wave) - integration_window_len) * wav.time.tstep / tstep) output_step = tstep / wav.time.tstep results = [] for i, cf in tqdm(enumerate(reversed(cfs)), "Gammatone spectrogram", total=len(cfs), unit='band'): fcoefs = np.flipud(make_erb_filters(fs, cf)) xf = erb_filterbank(wave, fcoefs) results.append(aggregate(xf[0], output_n_samples, output_step, integration_window_len)) result = np.sqrt(results) # package output freq_dim = Scalar('frequency', cfs[::-1], 'Hz') time_dim = UTS(wav.time.tmin, tstep, output_n_samples) if name is None: name = wav.name return NDVar(result, (freq_dim, time_dim), name)
""" Renders the given ``duration`` of audio from the audio file at ``path`` using the gammatone spectrogram function ``function``. """ fig = matplotlib.pyplot.figure() axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) erb_filterbank = filters.erb_filterbank(sig, ) axes.set_title("erb_filterbank"+ str(erb_filterbank.shape)+" " + os.path.basename(new_file_name_path)) axes.set_xlabel("Time (s)") axes.set_ylabel("Frequency") print("erb_filterbank.shape", erb_filterbank.shape) matplotlib.pyplot.plot(erb_filterbank) matplotlib.pyplot.show() ipdb.set_trace()
in_sig_mt = np.zeros_like(t) for p in range(2, num_h + 1): if p == mistuned_h: in_sig_mt += np.cos(2 * np.pi * f0 * p * t * mistuning) else: in_sig_mt += np.cos(2 * np.pi * f0 * p * t) in_sig += np.cos(2 * np.pi * f0 * p * t) in_sig /= np.max(in_sig) in_sig_mt /= np.max(in_sig_mt) ################################################################################ # SAC Peak Picking ################################################################################ f_c = gtf.erb_space(20., 5000., num=num_channels) erb_coefs = gtf.make_erb_filters(f_s, f_c) filt_channels = gtf.erb_filterbank(in_sig, erb_coefs) filt_channels_mt = gtf.erb_filterbank(in_sig_mt, erb_coefs) ac_channels = np.zeros((num_channels, w_size)) summary_ac = np.zeros(w_size, dtype=float) ac_channels_mt = np.zeros((num_channels, w_size)) summary_ac_mt = np.zeros(w_size, dtype=float) for k in range(num_channels): ac_channels[k, :] = dsp.correlate(filt_channels[k, -w_size:], filt_channels[k, -w_size:], mode='same') ac_channels_mt[k, :] = dsp.correlate(filt_channels_mt[k, -w_size:], filt_channels_mt[k, -w_size:], mode='same') summary_ac += np.clip(ac_channels[k, :], 1, None) # half-wave rectify summary_ac_mt += np.clip(ac_channels_mt[k, :], 0,
def GetFilteredOutputFromArray(array, FILTERBANK_COEFFICIENTS): # gammatone library needs a numpy array # Application of the filterbank to a vector filteredMatrix = filters.erb_filterbank(array, FILTERBANK_COEFFICIENTS) # Matrix of wavFile.getnframes() X 128 real values return filteredMatrix
def predict(self, clean, mixture, noise): # Computing gammatone envelopes if self.fast: mfs = 400.0 gt_env = fft_gtgram(mixture, self.fs, 0.010, 0.0025, self.n_cochlear_filters, self.low_freq) else: cfs = centre_freqs(self.fs, self.n_cochlear_filters, self.low_freq) fcoefs = make_erb_filters(self.fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(mixture, fcoefs))) mfs = self.fs wLength = np.ceil(self.wLengthS*mfs) wInc = np.ceil(self.wIncS*mfs) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(self.min_cf, self.max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = np.ceil((gt_env.shape[1])/wInc) w = hamming(wLength) energy = np.zeros((self.n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='delay') energy[i,j,:] = np.sum((w*mod_out_frame)**2, axis=1) if self.norm: peak_energy = np.max(np.mean(energy, axis=0)) min_energy = peak_energy*0.001 energy[energy < min_energy] = min_energy energy[energy > peak_energy] = peak_energy erbs = np.flipud(self.calc_erbs(self.low_freq, self.fs, self.n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = self.calc_cutoffs(mod_filter_cfs, self.fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 out = {'p': { 'srmr': np.sum(avg_energy[:, :4]) / np.sum(avg_energy[:, 4:Kstar])}, 'avg_energy': avg_energy } return out
def compute(filepath, file): modelpath = 'C:/Users/user/Desktop/cnn/data/model/M_uocSeq1SubEnv32by16_nASyn2000len_1000hopt.h5' #dir='C:/Users/Lab606B/Desktop/result/'#txt 儲存路徑 #wildcard="txt" # fileLabels=['1'] timeDim = 32 freqDim = 16 frameSizeMs = 2000 hopSizeMs = 1000 signal, samplerate = sf.read(filepath + file) lenSigSamp = len(signal) lenSigMs = 1000 * lenSigSamp / samplerate lenSigMs = lenSigMs startsMs = list(np.arange(0, lenSigMs - frameSizeMs, hopSizeMs)) stopsMs = [x + frameSizeMs for x in startsMs] #windowing using segmentation info and performing feature extraction /使用分段信息進行窗口化並執行特徵提取 starts = [int(round(x * samplerate / 1000)) for x in startsMs] stops = [int(round(x * samplerate / 1000)) for x in stopsMs] globalInd = 0 allFeatures = np.zeros((1, timeDim, freqDim)) # allLabels=[] #(無用) fileSegmentMap={}#map containing filename versus indexes of segments/features within all samples in this set /包含文件名的映射與此集合中所有樣本中的段/要素的索引 for ind in range(len(starts)): segment = signal[starts[ind]:stops[ind]] #applying windowing function to the segment /將窗口函數應用於段 segment = segment * create_window( stops[ind] - starts[ind], 'tukey', r=0.08) if (np.max(segment) > 0): #normalization /正規化 segment = segment / np.max(segment) #feature=Feature._computeSingleFrameFeature(segment) '''Sub-band envelopes feature computation 子帶包絡特徵計算''' #Computing sub-band signals /計算子帶信號 low_cut_off = 2 #lower cut off frequency = 2Hz /較低的截止頻率= 2Hz centre_freqVals = centre_freqs(samplerate, freqDim, low_cut_off) fcoefs = make_erb_filters(samplerate, centre_freqVals, width=1.0) y = erb_filterbank(segment, fcoefs) subenv = np.array([]).reshape(timeDim, 0) for i in range(freqDim): subBandSig = y[i, :] analytic_signal = hilbert(subBandSig) amp_env = np.abs(analytic_signal) np.nan_to_num(amp_env) #amp_env=resampy.resample(amp_env, len(amp_env), timeRes(timeDim), axis=-1)#resampy library used resampling /resampy庫使用重新取樣 #resampling may lead to unexpected computation errors, /重新採樣可能會導致意外的計算錯誤, #I prefered average amplitudes for short-time windows /我更喜歡短時間窗口的平均幅度 downSampEnv = np.zeros((timeDim, 1)) winSize = int(len(amp_env) / timeDim) for ind in range(timeDim): downSampEnv[ind] = np.log2( np.mean(amp_env[ind * winSize:(ind + 1) * winSize])) subenv = np.hstack([subenv, downSampEnv]) #removing mean and normalizing /刪除均值和正常化 subenv = subenv - np.mean(subenv) subenv = subenv / (np.max(np.abs(subenv))) feature = subenv #adding computed feature /添加計算特徵 if globalInd == 0: #if this is the first feature assign it directly /如果這是第一個功能直接分配它 allFeatures[0] = feature else: #add one more element in the feature vector and then assign /在特徵向量中添加一個元素,然後分配 allFeatures = np.vstack( [allFeatures, np.zeros((1, timeDim, freqDim))]) allFeatures[globalInd] = feature #(無用) #adding segment to file-segment map /將段添加到文件段映射 #(無用) if file in fileSegmentMap:#if file already exists, append segment /如果文件已存在,則追加段 #(無用) val=fileSegmentMap[file] #(無用) val.append(globalInd) #(無用) fileSegmentMap[file]=val #(無用) else:#file does not exist in map, add the first file-segment map /文件在地圖中不存在,添加第一個文件段映射 #(無用) fileSegmentMap[file]=[globalInd] #(無用) allLabels.append(fileLabels) globalInd += 1 #(無用) allFeatures=allFeatures.reshape(allFeatures.shape[0],timeRes,numBands,1) #(無用) allLabels=np.array(allLabels,dtype = np.int) #(無用) allLabels = to_categorical(allLabels) allFeatures = np.reshape(allFeatures, [len(allFeatures), timeDim, freqDim, 1]) #(無用) with open(filepath+'Test_Features.pkl', 'wb') as f: #(無用) pickle.dump(allFeatures, f, 1) #(無用) with open(filepath+'Test_Labels.pkl' , 'wb') as f: #(無用) pickle.dump(allLabels, f, 1) #(無用) with open(filepath+'Test_Map.pkl', 'wb') as f: #(無用) pickle.dump(fileSegmentMap, f, 1) model = keras.models.load_model(modelpath) y_probs = model.predict(allFeatures, batch_size=allFeatures.shape[0], verbose=0) #normal = -1 = 0 ; abnormal = 1 normal = 0 abnormal = 0 for i in range(len(y_probs)): if (y_probs[i, 0] > y_probs[i, 1]): normal = normal + 1 else: abnormal = abnormal + 1 if (normal > abnormal): result = 'normal' resultRate = normal / len(y_probs) * 100 elif (normal < abnormal): result = 'abnormal' resultRate = abnormal / len(y_probs) * 100 else: result = 'not sure' resultRate = 50 #建立txt檔 text_file_predict = open( 'C:/Users/user/Desktop/cnn/DataSpaceFoeFTP/Predict_Result/nxp/' + file.replace('.wav', '') + ".txt", "w", encoding='utf-8') #text_file_predict.write('test result(predict)\n') text_file_predict.write('檔案:' + str(file)) text_file_predict.write('\n') text_file_predict.write('\n診斷結果 =\t' + str(result)) text_file_predict.write('\n概率為 =\t' + str(resultRate) + '%') text_file_predict.write('\n------------------------------------------\n') # ListFilesToTxt(dir,file,wildcard, 1) text_file_predict.close() print('診斷結果為 : ', result) print('機率為 : ', resultRate, '%')
import numpy as np import gammatone.filters as gtf import matplotlib.pyplot as plt # make signal f_s = 44100 dt = 1 / f_s dur = 0.1 t = np.arange(0, dur, dt) num_h = 8 f0 = 220.0 in_sig = np.zeros_like(t) for p in range(1, num_h + 1): in_sig += np.cos(2 * np.pi * f0 * p * t) # set up gammatone stuff erb_freqs = gtf.erb_space(100.0, 4000.0, num=100) print(erb_freqs) erb_coefs = gtf.make_erb_filters(f_s, erb_freqs) filted = gtf.erb_filterbank(in_sig, erb_coefs) for k, channel in enumerate(filted): plt.plot(t, channel + 100 - k, color='k') plt.show()
fold = 0 for x in range(1, len(meta)): if int(meta[x][5]) == fold + 1: filename, foldno, classID = meta[x][0], int(meta[x][5]), int( meta[x][6]) s, sr = librosa.load('UrbanSound8K/audio/fold' + str(foldno) + '/' + filename, sr=44100) fcoefs = filters.make_erb_filters(sr, filters.centre_freqs(sr, 128, 40), odr=4) g = filters.erb_filterbank(s, fcoefs) c = cp.asarray(g) c = cp.power(c, 2) if len(c[0]) // 66536 > 0 and len(c[0]) % 65536 < 65536 / 2: nspecs = len(c[0]) // 65536 else: nspecs = (len(c[0]) // 65536) + 1 if len(c[0]) < 65536 + 512: c = cp.pad(c, ((0, 0), (0, 66048 - len(c[0]))), 'constant', constant_values=0) # For each spectrogram, for n in range(nspecs):
fig = plt.figure() ax1 = fig.add_subplot(1, 2, 2) ax1.set_xscale("log") ax1.stem(CFs, pitches, basefmt=" ") skip = 10 ax1.set_xticks([cf for cf in CFs[::skip]]) ax1.grid("on", axis='y') ax1.set_xlabel("CF of Adapative Template", size=16) ax1.set_ylabel("Phase-locked firing rate (Hz)", size=16) ax1.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) ax1.tick_params(axis='both', which='both', labelsize=12) ## place-rate profile, place-rate coding f_c = gtf.erb_space(100., 1600., num=100) erb_coefs = gtf.make_erb_filters(f_s, f_c) filt_channels = gtf.erb_filterbank(in_sig, erb_coefs) channel_power = np.zeros(len(filt_channels)) for k in range(len(channel_power)): channel_power[k] = np.sqrt(np.dot(filt_channels[k], filt_channels[k])) channel_power /= np.max(channel_power) ax2 = fig.add_subplot(1, 2, 1) ax2.set_xscale("log") skip = 10 ax2.set_xticks([cf for cf in f_c[::skip]]) ax2.stem(f_c, channel_power, basefmt=" ") ax2.grid("on", axis='y') ax2.set_xlabel("CF of Auditory Channel", size=16) ax2.set_ylabel("Normalized Power/Firing Rate", size=16) ax2.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) ax2.tick_params(axis='both', which='both', labelsize=12)
def perform_gammatone(audio_samples, filter_coeffs): return gt_filters.erb_filterbank(audio_samples, filter_coeffs)