def compute_CD(reference, estimated): fs_ref, ref_wavform = wavread(reference) fs_est, est_wavform = wavread(estimated) # ref_cepstrum = librosa.feature.mfcc(ref_wavform, sr=fs_ref, n_mfcc=8) # est_cepstrum = librosa.feature.mfcc(est_wavform, sr=fs_est, n_mfcc=8) assert fs_est == fs_ref f, t, ref_stft = stft(ref_wavform, fs_est, nperseg=400, noverlap=160, nfft=400) f, t, est_stft = stft(est_wavform, fs_est, nperseg=400, noverlap=160, nfft=400) ref_cepstrum = np.real( istft(np.log(abs(ref_stft)), fs_ref, nperseg=400, noverlap=160, nfft=400)) est_cepstrum = np.real( istft(np.log(abs(est_stft)), fs_ref, nperseg=400, noverlap=160, nfft=400)) mcd = np.sqrt(np.sum((ref_cepstrum - est_cepstrum)**2, axis=0)).mean() # mcd = metrics.melcd(est_cepstrum, ref_cepstrum) return mcd
def to_sig(self, **kwargs): """Create signal from stft, i.e. perform istft, kwargs overwrite Astft values for istft Parameters ---------- **kwargs : str optional keyboard arguments used in istft: 'sr', 'window', 'nperseg', 'noverlap', 'nfft', 'input_onesided', 'boundary'. also convert 'sr' to 'fs' since scipy uses 'fs' as sampling frequency. Returns ------- _ : Asig Asig """ for k in ['sr', 'window', 'nperseg', 'noverlap', 'nfft', 'input_onesided', 'boundary']: if k in kwargs.keys(): kwargs[k] = self.__getattribute__(k) if 'sr' in kwargs.keys(): kwargs['fs'] = kwargs['sr'] del kwargs['sr'] if self.channels == 1: # _ since 1st return value 'times' unused _, sig = istft(self.stft, **kwargs) return pya.asig.Asig(sig, sr=self.sr, label=self.label + '_2sig', cn=self.cn) else: _, sig = istft(self.stft, **kwargs) return pya.asig.Asig(np.transpose(sig), sr=self.sr, label=self.label + '_2sig', cn=self.cn)
def wwrite(signals_, rate, size, w, noverlap, overlap): _, xrec = signal.istft(signals_[0, :, :], rate, window=w, nperseg=size, noverlap=noverlap) inversed_signal = np.zeros((10, len(xrec))) for y in range(10): _, xrec = signal.istft(signals_[y, :, :], rate, window=w, nperseg=size, noverlap=noverlap) inversed_signal[y, :] = xrec inversed_signal = inversed_signal / np.max(np.abs(inversed_signal)) for i in range(10): j = i + 1 file = r'C:\Users\INFORMATICS\Desktop\3d-psr-record2\3d-psr-record\psr%d_%d%s%d_r15.wav' % ( j, size, w, overlap * 100) # file = r'/Users/egeerdem/Desktop/3d-psr-record\zpsr%d_%d%s%d.wav' %(j,size,w,overlap*100) write(file, rate, inversed_signal[i, :])
def main(): sample_rate, data = wavfile.read('./mixture2.wav') f, t, Zxx = sig.stft(data, fs=sample_rate) beta = 1 / np.sqrt(max(Zxx.shape)) M = Zxx / la.norm(Zxx) # L, S = solve(M, beta) L, S, _, _ = rpca(np.absolute(M), eps_dual=1e-10, verbose=True, max_iter=100) phase = np.exp(1j * np.angle(M)) L_it, L_ift = sig.istft(L * phase * la.norm(Zxx), fs=sample_rate) S_it, S_ift = sig.istft(S * phase * la.norm(Zxx), fs=sample_rate) reconstruction = L_ift + S_ift it, ift = sig.istft(Zxx, fs=sample_rate) print('loss:', ((reconstruction - ift)**2).sum()) say('reconstruction') play(ift, sample_rate) say('robust PCA') say('low rank') play(L_ift, sample_rate) say('sparse') play(S_ift, sample_rate) gain = 2 Mb = np.absolute(L) > np.absolute(S) * gain L_it, L_ift = sig.istft(M * Mb * la.norm(Zxx), fs=sample_rate) S_it, S_ift = sig.istft(M * (1 - Mb) * la.norm(Zxx), fs=sample_rate) say('with masking') say('background') play(L_ift, sample_rate) say('foreground') play(S_ift, sample_rate)
def test_axis_rolling(self): np.random.seed(1234) x_flat = np.random.randn(1024) _, _, z_flat = stft(x_flat) for a in range(3): newshape = [1,]*3 newshape[a] = -1 x = x_flat.reshape(newshape) _, _, z_plus = stft(x, axis=a) # Positive axis index _, _, z_minus = stft(x, axis=a-x.ndim) # Negative axis index assert_equal(z_flat, z_plus.squeeze(), err_msg=a) assert_equal(z_flat, z_minus.squeeze(), err_msg=a-x.ndim) # z_flat has shape [n_freq, n_time] # Test vs. transpose _, x_transpose_m = istft(z_flat.T, time_axis=-2, freq_axis=-1) _, x_transpose_p = istft(z_flat.T, time_axis=0, freq_axis=1) assert_allclose(x_flat, x_transpose_m, err_msg='istft transpose minus') assert_allclose(x_flat, x_transpose_p, err_msg='istft transpose plus')
def inverse(self, mask=None): '''Compute inverse Q-STFT Parameters ---------- mask: array_type mask applied to Q-STFT coefficients prior to inversion. If mask=None, no mask is employed. ''' # construct dict for inversion inversion_dict = dict(fs=self.params['fs'], window=self.params['window'], nperseg=self.params['nperseg'], noverlap=self.params['noverlap'], nfft=self.params['nfft'], boundary=self.params['boundary'], input_onesided=False) if mask is None: mask = np.ones(self.S0.shape, dtype=bool) tfp1, tfp2 = utils.sympSplit(self.tfpr * mask) t, x1 = sg.istft(tfp1, **inversion_dict) __, x2 = sg.istft(tfp2, **inversion_dict) xr = utils.sympSynth(x1, x2) return t, xr
def update(self, n): ''' Update plot and sound ''' try: # Get n as an integer if possible n = int(n) # reconstruct sound from STFT using only n channels (and DC) snd_stft = self.stft.copy() snd_stft[n+1:, :] = 0 _, snd = istft(snd_stft, self.f_s) except ValueError: # presbyacusis freqs = [50, 125, 250, 500, 1000, 2000, 3000, 4000, 5000, 8000] levels = [15, 10, 15, 25, 20, 30, 45, 67, 70, 70] levels = [level2amp(-l) for l in levels] coeff = np.interp(self.f, freqs, levels) coeff[0] = 0 # set DC to zero snd_stft = self.stft.copy() _, snd = istft(snd_stft * coeff[:, np.newaxis], self.f_s) with self.widgets['audiooutput']: clear_output(wait=True) self.widgets['audioplayer'].update_data(self.f_s, snd) display(self.widgets['audioplayer']) with self.widgets['graphoutput']: clear_output(wait=True) self.show_shipping(snd)
def reconstruct_signals(list_of_reconstructed, padded_im, signal_stft_list, indices): reconstructed_dict = {} for i in range(len(indices)): position_of_signal = indices[i] real_component = list_of_reconstructed[i] imaginary_component = padded_im[position_of_signal] actual_signal_stft = signal_stft_list[position_of_signal] actual_signal_stft_cols = actual_signal_stft.shape[1] signal_stft_padded = real_component + 1j * imaginary_component signal_stft_same_size = signal_stft_padded[:, 0:actual_signal_stft_cols] original_signal = istft(actual_signal_stft, samplingFreq, 'hann') reconstructed_signal = istft(signal_stft_same_size, samplingFreq, 'hann') squared_error = np.sum( (original_signal[1] - reconstructed_signal[1])**2) num_samples = original_signal[1].shape[0] mean_squared_error = squared_error / num_samples power_signal = sum([p**2 for p in original_signal[1]]) / num_samples power_noise = sum([p**2 for p in reconstructed_signal[1]]) / num_samples db = 10 * np.log10(power_signal / power_noise) reconstructed_dict[position_of_signal] = {} reconstructed_dict[position_of_signal][ 'original_signal'] = original_signal reconstructed_dict[position_of_signal][ 'reconstructed_signal'] = reconstructed_signal reconstructed_dict[position_of_signal]['mse'] = mean_squared_error reconstructed_dict[position_of_signal]['snr'] = db return reconstructed_dict
def test_roundtrip_boundary_extension(self): np.random.seed(1234) # Test against boxcar, since window is all ones, and thus can be fully # recovered with no boundary extension settings = [ ('boxcar', 100, 10, 0), # Test no overlap ('boxcar', 100, 10, 9), # Test high overlap ] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10*np.random.randn(t.size) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=True, boundary=None) _, xr = istft(zz, noverlap=noverlap, window=window, boundary=False) for boundary in ['even', 'odd', 'constant', 'zeros']: _, _, zz_ext = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=True, boundary=boundary) _, xr_ext = istft(zz_ext, noverlap=noverlap, window=window, boundary=True) msg = '{0}, {1}, {2}'.format(window, noverlap, boundary) assert_allclose(x, xr, err_msg=msg) assert_allclose(x, xr_ext, err_msg=msg)
def Binary_Mask(Zxx, Zxx1, L, R, index, fs, EPSILON): spectrogramL_Cluster, spectrogramL_phase = librosa.magphase(Zxx) spectrogramR_Cluster, spectrogramR_phase = librosa.magphase(Zxx1) D = [] for i in index: D.append(L[i]) D = (np.array(D)) Real_index = [] for i in range(0, len(D)): Real_index.append(np.where(spectrogramL_Cluster == D[i])) E = (np.array(Real_index[i])).reshape(-1) spectrogramL_Cluster[E[0]][E[1]] = EPSILON D = [] for i in index: D.append(R[i]) D = (np.array(D)) Real_index = [] for i in range(0, len(D)): Real_index.append(np.where(spectrogramR_Cluster == D[i])) E = (np.array(Real_index[i])).reshape(-1) spectrogramR_Cluster[E[0]][E[1]] = EPSILON _, xrec_Cluster = signal.istft(spectrogramL_Cluster * spectrogramL_phase, fs) _, xrec1_Cluster = signal.istft(spectrogramR_Cluster * spectrogramR_phase, fs) return xrec_Cluster, xrec1_Cluster, spectrogramL_Cluster, spectrogramR_Cluster
def test_permute_axes(self): np.random.seed(1234) x = np.random.randn(1024) fs = 1.0 window = 'hann' nperseg = 16 noverlap = 8 f1, t1, Z1 = stft(x, fs, window, nperseg, noverlap) f2, t2, Z2 = stft(x.reshape((-1, 1, 1)), fs, window, nperseg, noverlap, axis=0) t3, x1 = istft(Z1, fs, window, nperseg, noverlap) t4, x2 = istft(Z2.T, fs, window, nperseg, noverlap, time_axis=0, freq_axis=-1) assert_allclose(f1, f2) assert_allclose(t1, t2) assert_allclose(t3, t4) assert_allclose(Z1, Z2[:, 0, 0, :]) assert_allclose(x1, x2[:, 0, 0])
def test_roundtrip_padded_FFT(self): np.random.seed(1234) settings = [ ('hann', 1024, 256, 128, 512), ('hann', 1024, 256, 128, 501), ('boxcar', 100, 10, 0, 33), (('tukey', 0.5), 1152, 256, 64, 1024), ] for window, N, nperseg, noverlap, nfft in settings: t = np.arange(N) x = 10*np.random.randn(t.size) xc = x*np.exp(1j*np.pi/4) # real signal _, _, z = stft(x, nperseg=nperseg, noverlap=noverlap, nfft=nfft, window=window, detrend=None, padded=True) # complex signal _, _, zc = stft(xc, nperseg=nperseg, noverlap=noverlap, nfft=nfft, window=window, detrend=None, padded=True, return_onesided=False) tr, xr = istft(z, nperseg=nperseg, noverlap=noverlap, nfft=nfft, window=window) tr, xcr = istft(zc, nperseg=nperseg, noverlap=noverlap, nfft=nfft, window=window, input_onesided=False) msg = '{0}, {1}'.format(window, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg) assert_allclose(xc, xcr, err_msg=msg)
def NMFtransform(W1W2H, W1W2, W1, W2, ZX): """ Separate speech using NMF mask from previously generated basis vectors and joint activations. Parameters: W1W2H: Temporal activations corresponding to concatenation of basis vectors S1W, S2W ZX: STFT of the mixed audio signal Returns: speaker1: recovered signal for speaker 1 in the original audio domain speaker2: recovered signal for speaker 2 in the original audio domain """ bv1 = W1.shape[1] bv2 = W2.shape[1] M1 = np.dot(W1,W1W2H[:bv1,:]) / np.dot(W1W2, W1W2H) M2 = np.dot(W2,W1W2H[bv1:,:]) / np.dot(W1W2, W1W2H) SX1 = np.multiply(M1,ZX) SX2 = np.multiply(M2,ZX) _, speaker1 = istft(SX1) _, speaker2 = istft(SX2) return speaker1, speaker2
def save_audio(freq, audio_path): freq = freq.numpy().transpose() freq_left = freq[:, :, 0] + 1j * freq[:, :, 1] freq_right = freq[:, :, 2] + 1j * freq[:, :, 3] _, rec_left = signal.istft(freq_left, 10e3) _, rec_right = signal.istft(freq_right, 10e3) audio_rec = np.vstack((rec_left, rec_right)).T sf.write(audio_path, audio_rec, 44100, format='WAV', subtype="PCM_16")
def _filter_bandpass_stft(data, t, dt, fs, nt, nch, df): # Get stft f, tf, ssx = scpsig.stft(data, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, detrend=False, return_onesided=True, boundary=boundary, padded=padded, axis=0) ssx = np.abs(ssx)**2 nf = f.size # Intervals of interest for reconstruction if df is None: indin = np.ones((nf, ), dtype=bool) else: indin = (f >= df[0]) & (f <= df[1]) if harm: for ii in range(1, int(np.floor(f.max() / df[0]))): indin = indin | ((f >= df[0] * ii) & (f <= df[1] * ii)) if df_out is not None: indin = indin & ~((f >= df_out[0]) & (f <= df_out[1])) if harm_out: for ii in range(1, int(np.floor(f.max() / df_out[0]))): indin = indin & ~((f >= df_out[0] * ii) & (f <= df_out[1] * ii)) # Reconstructing ssxphys = np.copy(ssx) ssxphys[~indin, :] = 0 ssx[indin, :] = 0 data_in = scpsig.istft(ssxphys, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, input_onesided=True, boundary=boundary, time_axis=0, freq_axis=0) data_out = scpsig.istft(ssx, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, input_onesided=True, boundary=boundary, time_axis=0, freq_axis=0) return data_in, data_out
def reverseFFT(xList, yList, zList): fs = 1000 t, resultX = signal.istft(xList, fs, nfft=128, noverlap=120) t, resultY = signal.istft(yList, fs, nfft=128, noverlap=120) t, resultZ = signal.istft(zList, fs, nfft=128, noverlap=120) # resultX = np.abs(np.fft.ifft(xList)) # resultY = np.abs(np.fft.ifft(yList)) # resultZ = np.abs(np.fft.ifft(zList)) # TODO: test if the cause is ABS return t, resultX, resultY, resultZ
def test_roundtrip_complex(self): np.random.seed(1234) settings = [ ('boxcar', 100, 10, 0), # Test no overlap ('boxcar', 100, 10, 9), # Test high overlap ('bartlett', 101, 51, 26), # Test odd nperseg ('hann', 1024, 256, 128), # Test defaults (('tukey', 0.5), 1152, 256, 64), # Test Tukey ('hann', 1024, 256, 255), # Test overlapped hann ] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10 * np.random.randn(t.size) + 10j * np.random.randn(t.size) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False, return_onesided=False) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window, input_onesided=False) msg = '{0}, {1}, {2}'.format(window, nperseg, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg) # Check that asking for onesided switches to twosided with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False, return_onesided=True) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window, input_onesided=False) msg = '{0}, {1}, {2}'.format(window, nperseg, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg)
def separate(fileName = 'audio.wav', numComp = 5, numIter = 100, a = 1, b = 1): # Read file fs, data = wavfile.read(fileName) x = data[:,0] if len(data.shape) == 2 else data # Get Spectrogram - 40ms frames, 50% overlap winLen = int(40e-3 * fs) noverlap = winLen // 2 win = signal.windows.hamming(winLen, sym=False) #f, t, S = signal.spectrogram(x, fs, win, winLen, noverlap, winLen, False, mode='magnitude') f, t, S = signal.stft(x, fs, win, winLen, noverlap, winLen, detrend=False, return_onesided=True, boundary=None) plt.pcolormesh(t, f, 20*np.log10(np.abs(S))) plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') #plt.show() T = len(t) # Number of time frames K = len(f) # Frequency ticks J = numComp # Number of components X = np.abs(S) B = np.abs(np.random.normal(size=(K, J))) G = np.abs(np.random.normal(size=(J, T))) # Train - numIter, a, b for i in range(numIter): B = updateB(X, B, G) G = updateG(X, B, G, a, b) cost = costKLD(X, B, G) + a * costTemporal(G) + b * costSparsity(G) print(cost) # Synthesis BG = np.matmul(B, G) plt.pcolormesh(t, f, 20*np.log10(np.abs(BG))) plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') #plt.show() ang = np.angle(S) complexBG = BG * np.exp(1j * ang) t, y = signal.istft(complexBG, fs, win, winLen, noverlap, winLen, True, False) wavfile.write('res.wav', fs, np.int16(y)) #plt.imshow(B[:,1, None], extent=[0,1,0,20000], aspect='auto', interpolation='nearest') #plt.show() # Get Components for j in range(J): curG = G[None, j, :] # Slice while maintaining dims curB = B[:, j, None] comp = (curB * curG) * np.exp(1j * ang) t, y = signal.istft(comp, fs, win, winLen, noverlap, winLen, True, False) wavfile.write('comp{}.wav'.format(j), fs, np.int16(y))
def separate(fileName = 'audio.wav', numComp = 5, numIter = 250, a = 1, b = 1): # Read file fs, data = wavfile.read(fileName) x = data[:,0] if len(data.shape) == 2 else data # Get Spectrogram - 40ms frames, 50% overlap winLen = int(40e-3 * fs) noverlap = winLen // 2 win = signal.windows.hamming(winLen, sym=False) f, t, X = signal.stft(x, fs, win, winLen, noverlap, winLen, detrend=False, return_onesided=True, boundary=None) plt.pcolormesh(t, f, 20*np.log10(np.abs(X))) plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') plt.title('Spectrogram of the Original Signal') plt.show() T = len(t) # Number of time frames F = len(f) # Frequency ticks K = numComp # Number of components V = np.abs(X) W = np.abs(np.random.normal(size=(F, K))) H = np.abs(np.random.normal(size=(K, T))) # Train - numIter, a, b for i in range(numIter): W = updateB(V, W, H) H = updateG(V, W, H, a, b) cost = costKLD(V, W, H) + a * costTemporal(H) + b * costSparsity(H) print(cost) # Synthesis WH = np.matmul(W, H) plt.pcolormesh(t, f, 20*np.log10(np.abs(WH))) plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') plt.title('Spectrogram of the Reconstructed Signal') plt.show() # Reconstructed audio ang = np.angle(X) complexBG = WH * np.exp(1j * ang) t, y = signal.istft(complexBG, fs, win, winLen, noverlap, winLen, True, False) wavfile.write('recons.wav', fs, np.int16(y)) # Save each component for j in range(K): curH = H[None, j, :] # Slice while maintaining dims curW = W[:, j, None] comp = (curW * curH) * np.exp(1j * ang) t, y = signal.istft(comp, fs, win, winLen, noverlap, winLen, True, False) wavfile.write('comp{}.wav'.format(j), fs, np.int16(y))
def separate_with_mask(x, mask, force_mask_structure=False): f, t, Sxx = signal.stft(x, fs, window=window, nperseg=nperseg) if force_mask_structure: Sxx = Sxx[:mask.shape[0], :mask.shape[1]] phase_exp = np.exp(1j * np.angle(Sxx)) # type: np.ndarray magnitude = np.abs(Sxx) # type: np.ndarray Sxx0 = magnitude * (1 - mask) * phase_exp Sxx1 = magnitude * mask * phase_exp _, x0 = signal.istft(Sxx0, fs, window=window, nperseg=nperseg) _, x1 = signal.istft(Sxx1, fs, window=window, nperseg=nperseg) # print (Sxx1 - Sxx2) return x0, x1
def filter_with_stft(x, fs=128, window='hann', nperseg=128, noverlap=None, nfft=256): ''' 利用短时傅里叶变换进行滤波,得到 4 个子频带 theta(4-7Hz), alpha(8-13Hz), beta(14-30Hz), gamma(31-50Hz) ''' # 短时傅里叶变化,窗函数长度 1s f, t, Zxx = signal.stft(x, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft) # 下面提取 theta 频率段的频域信息 theta_index_1 = f >= 3.5 theta_index_2 = f <= 7.5 theta_index = theta_index_1 == theta_index_2 theta_index = theta_index.reshape(-1, 1) theta_index = np.c_[tuple([theta_index]*t.shape[0])] theta_freq = np.where(theta_index, Zxx, 0) # 逆变换,得到 theta 频率段时域信号 _, rec_theta = signal.istft(theta_freq, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft) # 下面提取 alpha 频率段的频域信息 alpha_index_1 = f >= 7.5 alpha_index_2 = f <= 13.5 alpha_index = alpha_index_1 == alpha_index_2 alpha_index = alpha_index.reshape(-1, 1) alpha_index = np.c_[tuple([alpha_index]*t.shape[0])] alpha_freq = np.where(alpha_index, Zxx, 0) # 逆变换,得到 alpha 频率段时域信号 _, rec_alpha = signal.istft(alpha_freq, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft) # 下面提取 beta 频率段的频域信息 beta_index_1 = f >= 13.5 beta_index_2 = f <= 30.5 beta_index = beta_index_1 == beta_index_2 beta_index = beta_index.reshape(-1, 1) beta_index = np.c_[tuple([beta_index]*t.shape[0])] beta_freq = np.where(beta_index, Zxx, 0) # 逆变换,得到 beta 频率段时域信号 _, rec_beta = signal.istft(beta_freq, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft) # 下面提取 gamma 频率段的频域信息 gamma_index_1 = f >= 30.5 gamma_index_2 = f <= 50.0 gamma_index = gamma_index_1 == gamma_index_2 gamma_index = gamma_index.reshape(-1, 1) gamma_index = np.c_[tuple([gamma_index]*t.shape[0])] gamma_freq = np.where(gamma_index, Zxx, 0) # 逆变换,得到 theta 频率段时域信号 _, rec_gamma = signal.istft(gamma_freq, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft) assert (len(rec_theta) == len(x)) assert (len(rec_alpha) == len(x)) assert (len(rec_beta) == len(x)) assert (len(rec_gamma) == len(x)) return (rec_theta, rec_alpha, rec_beta, rec_gamma)
def test_roundtrip_real(self): np.random.seed(1234) settings = [ ('boxcar', 100, 10, 0), # Test no overlap ('boxcar', 100, 10, 9), # Test high overlap ('bartlett', 101, 51, 26), # Test odd nperseg ('hann', 1024, 256, 128), # Test defaults (('tukey', 0.5), 1152, 256, 64), # Test Tukey ('hann', 1024, 256, 255), # Test overlapped hann ] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10*np.random.randn(t.size) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window) msg = '{0}, {1}'.format(window, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg)
def estimateSpectro(X_origin, newM): # small epsilon to avoid dividing by zero eps = np.finfo(np.float).eps # compute model as the sum of spectrograms model = eps for name, source in newM.items(): # 遍历所有声部,求mask中的分母 model += newM[name] # now performs separation estimates = {} for name, source in newM.items(): # 遍历所有声部,用mask分离出各个声部 # compute soft mask as the ratio between source spectrogram and total Mask = newM[name] / model # multiply the mix by the mask Yj = Mask * X_origin # invert to time domain target_estimate = istft(Yj, nperseg=4096, noverlap=3072)[1].T # set this as the source estimate estimates[name] = target_estimate return estimates
def spectrogram_to_audio(data): return istft( data, sample_rate, nperseg=samples_per_window, noverlap=samples_per_step * 4, )[1]
def gen_audio(self, seg_length, prog_ind): num_samps = seg_length * CHUNK self.make_audio = False ### Truncating track to window for STFT snip1 = self.track1[(num_samps * prog_ind):(num_samps * (prog_ind + 1) + LEN_WINDOW)] snip2 = self.track2[(num_samps * prog_ind):(num_samps * (prog_ind + 1) + LEN_WINDOW)] ### Perform Short Time Fourier Transform on Snip A _, _, A = signal.stft(snip1, nperseg=LEN_WINDOW, nfft=LEN_WINDOW, fs=SAMP_RATE, noverlap=3 * CHUNK) #STFT mag = np.abs(A) #Magnitude response of the STFT max_A = mag.max( axis=0 ) + 0.000000001 #Used for normalizing STFT frames (with addition to avoid division by zero) magA = mag / max_A #Normalizing magA = magA.T phaseA = np.angle(A) #Phase response of STFT ### Perform Short Time Fourier Transform on Snip B _, _, B = signal.stft(snip2, nperseg=LEN_WINDOW, nfft=LEN_WINDOW, fs=SAMP_RATE, noverlap=3 * CHUNK) #STFT mag = np.abs(B) #Magnitude response of the STFT max_B = mag.max( axis=0 ) + 0.000000001 #Used for normalizing STFT frames (with addition to avoid division by zero) magB = mag / max_B #Normalizing magB = magB.T phaseB = np.angle(B) #Phase response of STFT temp_alpha = np.tile( self.alpha * self.temp_sliders, (NUM_CHUNKS + 5, 1)) #NUM_CHUNKS+5 is really sus, might have temp_negalpha = np.tile((1 - self.alpha) * self.temp_sliders, (NUM_CHUNKS + 5, 1)) temp_phase = self.alpha * phaseA + ( 1 - self.alpha) * phaseB #Unstack and Interpolate Phase temp_max = self.alpha * max_A + ( 1 - self.alpha) * max_B #Unstack and Interpolate Normalizing gains temp_out_mag = self.full_net.predict( [magA, magB, temp_alpha, temp_negalpha]) out_mag = temp_out_mag.T * temp_max E = out_mag * np.exp(1j * temp_phase) _, temp_out = np.float32( signal.istft(0.24 * E, fs=SAMP_RATE, noverlap=3 * CHUNK)) #0.24 sus out = temp_out[CHUNK:-2 * CHUNK] newdim = len(out) // CHUNK self.new_data = out.reshape((newdim, CHUNK))
def RebuildWavFromMask(data, mask, window, window_size, window_shift, spl=8000): ''' data: [T, num_bins] <- complex mask: [T, num_bins] ''' mix_abs = np.abs(data) mix_angles = np.angle(data) c_spk = mix_abs * mask i = complex('1j') rebuild = c_spk * np.cos(mix_angles) + i * c_spk * np.sin(mix_angles) _, rebuild_wav = signal.istft(rebuild, fs=spl, window=window, nperseg=window_size, noverlap=window_size - window_shift, nfft=window_size, time_axis=-2, freq_axis=-1) return rebuild_wav
def recover_from_stft_spectrogram(Zxx, fs): ''' Recover the time-domain signal from a spectrogram via the inverse STFT :param : Zxx. np.array. The complex spectrogram :param : fs. int. Sample rate of the signal :return : data. time-domain signal ''' # According to the paper, the spectrogram is computed using a Hann window with a length of 25 ms, # a hop length of 10 ms and FFt size of 512 # I believe the length of each segment is the hann window length n_per_seg = int(hann_win_length * fs) # The hop size H = n_per_seg - n_overlap according to scipy n_hop_size = int(hop_length * fs) n_overlap = n_per_seg - n_hop_size # Compute inverse STFT if the nonzero overlap add constraint is satisfied if check_NOLA('hann', n_per_seg, n_overlap): t, data = istft(Zxx, fs, window='hann', nperseg=n_per_seg, noverlap=n_overlap, nfft=fft_size) return t, data else: raise Exception( "The nonzero overlap constraint was not met while computing an inverse STFT" )
def istft(self, X): """ Generates the inverse STFT of frequency domain data. Parameters ---------- X : ndarray, shape(nb_frames, nb_bins, nb_channels) audio signal in time domain. Returns ------- data : ndarray, shape(nb_channels, nb_samples) audio signal in time domain. Notes ----- inverse STFT is given by: .. math:: x(m) = \\frac{ \\sum_{t}x_{t}(m)h(h-tH) }{ \\sum_{t} h^{2}(m-tH) } Where the parameters represents following: * :math:`x(m)` is retrieved signal * :math:`h` is window * :math:`t` is time of the signal * :math:`H` = `n_per_seg` - `n_overlap` is hop size of the signal See Also -------- stft: STFT (Short Time Fourier Transformation) """ t, data = istft(Zxx=X.T, fs=self.sr, noverlap=self.n_overlap) return data
def stft_to_wav(Zxx_magn, Zxx_phase): """Convert an STFT (magnitude-only!) 2D numpy array to a time series audio signal. Args: Zxx (2D numpy array): The STFT [nfft//2 + 1, n_windows] to convert. Outputs: wav (1D numpy array): The reconstructed mono audio signal. """ #first construct spectrum from magnitude and phase Zxx = get_spectrum(Zxx_magn, Zxx_phase) #check if inversion of stft is possible! print( "inversion possible? ", check_COLA(Preprocessing.WINDOW, Preprocessing.WINLEN, Preprocessing.WINSHIFT)) times, wav = istft(Zxx, fs=Preprocessing.FS, window=Preprocessing.WINDOW, nperseg=Preprocessing.WINLEN, noverlap=Preprocessing.WINSHIFT, nfft=Preprocessing.NFFT, input_onesided=Preprocessing.ONESIDED, boundary=Preprocessing.BOUNDARY, time_axis=-1, freq_axis=-2) assert not np.isnan(wav).any() # The output signal must be in the range [-1, 1], otherwise we need to clip or normalize. max_sample = np.max(abs(wav)) if max_sample > 1.0: wav = wav / max_sample return wav
def griffinLims(S,maxIter,nfft,Overlap): S0 = S.astype('complex128') for _ in range(maxIter): _,x = istft(S,window='hann',nperseg=nfft,noverlap=Overlap,input_onesided=True,boundary=True,time_axis=-1,freq_axis=-3) x = np.transpose(x) _,_,S_est = stft(x,window='hann',nperseg=nfft,noverlap=Overlap,return_onesided=True,boundary='zeros',padded=True,axis=0) S_est_pos = np.abs(S_est) S_est_pos[S_est_pos<1e-6] = 1e-6 phase = S_est/S_est_pos S = np.multiply(phase,S0) _,x = istft(S,window='hann',nperseg=nfft,noverlap=Overlap,input_onesided=True,boundary=True,time_axis=-1,freq_axis=-3) x = np.transpose(x) x = np.real(x) return x
def test_roundtrip_complex(self): np.random.seed(1234) settings = [ ('boxcar', 100, 10, 0), # Test no overlap ('boxcar', 100, 10, 9), # Test high overlap ('bartlett', 101, 51, 26), # Test odd nperseg ('hann', 1024, 256, 128), # Test defaults (('tukey', 0.5), 1152, 256, 64), # Test Tukey ('hann', 1024, 256, 255), # Test overlapped hann ] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10*np.random.randn(t.size) + 10j*np.random.randn(t.size) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False, return_onesided=False) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window, input_onesided=False) msg = '{0}, {1}, {2}'.format(window, nperseg, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg) # Check that asking for onesided switches to twosided with suppress_warnings() as sup: sup.filter(UserWarning, "Input data is complex, switching to return_onesided=False") _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False, return_onesided=True) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window, input_onesided=False) msg = '{0}, {1}, {2}'.format(window, nperseg, noverlap) assert_allclose(t, tr, err_msg=msg) assert_allclose(x, xr, err_msg=msg)
def test_roundtrip_float32(self): np.random.seed(1234) settings = [('hann', 1024, 256, 128)] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10*np.random.randn(t.size) x = x.astype(np.float32) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=False) tr, xr = istft(zz, nperseg=nperseg, noverlap=noverlap, window=window) msg = '{0}, {1}'.format(window, noverlap) assert_allclose(t, t, err_msg=msg) assert_allclose(x, xr, err_msg=msg, rtol=1e-4) assert_(x.dtype == xr.dtype)
def test_roundtrip_padded_signal(self): np.random.seed(1234) settings = [ ('boxcar', 101, 10, 0), ('hann', 1000, 256, 128), ] for window, N, nperseg, noverlap in settings: t = np.arange(N) x = 10*np.random.randn(t.size) _, _, zz = stft(x, nperseg=nperseg, noverlap=noverlap, window=window, detrend=None, padded=True) tr, xr = istft(zz, noverlap=noverlap, window=window) msg = '{0}, {1}'.format(window, noverlap) # Account for possible zero-padding at the end assert_allclose(t, tr[:t.size], err_msg=msg) assert_allclose(x, xr[:x.size], err_msg=msg)