def voiceMusicSeparation(audio, masktype=1, lamb=1.25, gain=1.25): import stft # stft specgram = stft.spectrogram(audio) # rpca D = abs(specgram) angle = np.angle(specgram) A_mag, E_mag, numiter = ialmRPCA(D, lamb) A = A_mag * scipy.exp(angle * 1j) E = E_mag * scipy.exp(angle * 1j) # binary mask if (masktype): m = 1.0 * (abs(E_mag) > abs(gain * A_mag)) Emask = m * specgram Amask = specgram - Emask else: Emask = E Amask = A # istft outputA = stft.ispectrogram(Amask) outputE = stft.ispectrogram(Emask) #output wavoutA = np.array(outputA[:len(audio)], dtype=np.int16) wavoutE = np.array(outputE[:len(audio)], dtype=np.int16) return wavoutA, wavoutE
def test_maxdim(): a = numpy.random.random((512, 2, 2)) with pytest.raises(ValueError): stft.spectrogram(a) b = numpy.random.random((512, 2, 2, 3)) with pytest.raises(ValueError): stft.ispectrogram(b)
def createMatrix(): # spectrogram_arguments = {'framelength': 512, 'overlap': 512, 'window': scipy.signal.hamming(512)} def saveFile(fn, data): f = open(fn, 'wb') pickle.dump(data, f) f.close() fs1, data1 = wavfile.read(raw1) fs2, data2 = wavfile.read(raw2) minlen = min(len(data1), len(data2)) data1 = data1[:minlen] data2 = data2[:minlen] spec1 = stft.spectrogram(data1) spec2 = stft.spectrogram(data2) # Reduce dimension spec1 = squeeze(spec1) spec2 = squeeze(spec2) # same dimensions a = np.zeros(spec1.shape) b = np.zeros(spec2.shape) # hard for i in range(len(spec1)): for j in range(len(spec1[0])): if abs(spec1[i][j]) < abs(spec2[i][j]): b[i][j] = 1.0 else: a[i][j] = 1.0 # soft # for i in range(len(spec1)): # for j in range(len(spec1[0])): # if (abs(spec1[i][j]) + abs(spec2[i][j])) == 0: # continue # a[i][j] = abs(spec1[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j])) # b[i][j] = abs(spec2[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j])) fs, data = wavfile.read(merged) spec = stft.spectrogram(data) spec = squeeze(spec) output_a = createSpectrogram(np.multiply(a, spec), spec) output_b = createSpectrogram(np.multiply(b, spec), spec) output_a2 = stft.ispectrogram(output_a) output_b2 = stft.ispectrogram(output_b) writeWav(separated_dir + "a.wav", fs1, output_a2) writeWav(separated_dir + "b.wav", fs1, output_b2) return
def imdst( X, odd=True, transforms=None, **kwargs ): """ Calculate lapped inverse MDST of input signal Parameters ---------- x : array_like The input signal odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` Additional keyword arguments passed to :code:`stft.spectrogram` Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.imdst : inverse MDST """ if transforms is None: transforms = transforms_default kwargs.setdefault('framelength', 2048) if not odd: return stft.ispectrogram( X, transform=[ functools.partial(transforms.imdst, odd=False), functools.partial(transforms.imdct, odd=False), ], halved=False, **kwargs ) else: return stft.ispectrogram( X, transform=transforms.imdst, halved=False, **kwargs )
def test_maxdim(): """ Test if breaking elementary limitations (2D signal, 3D spectrogram at most) are caught appropriately """ a = numpy.random.random((512, 2, 2)) with pytest.raises(ValueError): stft.spectrogram(a) b = numpy.random.random((512, 2, 2, 3)) with pytest.raises(ValueError): # we cannot infer data from a NumPy array, so we set framelengt here stft.ispectrogram(b, framelength=1024)
def icmdct(X, odd=True, transforms=None, **kwargs): """ Calculate lapped inverse complex MDCT/MCLT of input signal Parameters ---------- x : array_like The input signal odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` Additional keyword arguments passed to :code:`stft.spectrogram` Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.icmdct : inverse complex MDCT """ if transforms is None: transforms = transforms_default return stft.ispectrogram(X, transform=functools.partial(transforms.icmdct, odd=odd), halved=False, **kwargs)
def test_issue_autoinverse_defaults(signal): """ Using defaults in inverse did not work because there were none in place """ x = numpy.array(stft.spectrogram(signal)) y = stft.ispectrogram(x)
def test_issue_autoinverse_values(signal, framelength): """ Passing values to inverse on a plain array failed as the values were not actually used """ x = numpy.array(stft.spectrogram(signal, framelength=framelength)) y = stft.ispectrogram(x, framelength=framelength)
def imdst(X, odd=True, transforms=None, **kwargs): """ Calculate lapped inverse MDST of input signal Parameters ---------- x : array_like The input signal odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` Additional keyword arguments passed to :code:`stft.spectrogram` Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.imdst : inverse MDST """ if transforms is None: transforms = transforms_default kwargs.setdefault('framelength', 2048) if not odd: return stft.ispectrogram(X, transform=[ functools.partial(transforms.imdst, odd=False), functools.partial(transforms.imdct, odd=False), ], halved=False, **kwargs) else: return stft.ispectrogram(X, transform=transforms.imdst, halved=False, **kwargs)
def compute_inverse_spectrogram(reals, ims=None): if ims != None: specgram = reals + 1j * ims else: specgram = reals output = stft.ispectrogram(specgram, framelength=SEG_SIZE, overlap=OVER_LAP) return output
def test_multiple_transforms(signal): """ Test if giving multiple different transforms works OK """ a = signal x = stft.spectrogram(a, transform=[scipy.fftpack.fft, numpy.fft.fft]) y = stft.ispectrogram(x, transform=[scipy.fftpack.ifft, numpy.fft.ifft]) assert numpy.allclose(a, y)
def test_real(signal): """ Test if real valued input results in real valued output """ a = signal x = stft.spectrogram(a) y = stft.ispectrogram(x) assert y.dtype == numpy.float64
def divide(): def loadFile(fn): f = open(fn, 'rb') data = pickle.load(f) f.close() return data fs, data = wavfile.read(merged) spec = stft.spectrogram(data, framelength=512) spec = squeeze(spec) Ma = loadFile(m_dir + "M_" + raw1[:-4]) Mb = loadFile(m_dir + "M_" + raw2[:-4]) a = createSpectrogram(np.dot(Ma, spec), spec) b = createSpectrogram(np.dot(Mb, spec), spec) output_a = stft.ispectrogram(a) output_b = stft.ispectrogram(b) writeWav(separated_dir + "a.wav", fs, output_a) writeWav(separated_dir + "b.wav", fs, output_b)
def test_precision(channels, padding, signal, framelength): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram(a, framelength=framelength, padding=padding) y = stft.ispectrogram(x, framelength=framelength, padding=padding) # Crop first and last frame assert numpy.allclose(a, y)
def test_overriding(channels, padding, signal, framelength): """ Test if overriding transform settings works """ a = signal x = stft.spectrogram(a, framelength=framelength, padding=padding) y = stft.ispectrogram(x, framelength=framelength) # We were using no overlap during inverse, so our output is twice as long assert numpy.allclose(a, y)
def test_rms(channels, padding, signal, framelength): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram(a, framelength=framelength, padding=padding) y = stft.ispectrogram(x, framelength=framelength, padding=padding) # Crop first and last frame assert numpy.sqrt(numpy.mean((a - y) ** 2)) < 1e-8
def _istft(stft_matrix_list): ''' Inverse Short-Time Fourier Transformation ''' audios = [] for sm in stft_matrix_list: sm = np.transpose(sm) ad = stft.ispectrogram(sm, framelength=config.STFT_POINT, overlap=config.STFT_OVERLAP) audios.append(ad) return audios
def test_rms(channels, padding, signal, framelength, halved): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram( a, framelength=framelength, padding=padding, halved=halved ) y = stft.ispectrogram(x) assert numpy.sqrt(numpy.mean((a - y) ** 2)) < 1e-8
def test_precision(channels, padding, signal, framelength, halved): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram( a, framelength=framelength, padding=padding, halved=halved ) y = stft.ispectrogram(x) assert numpy.allclose(a, y)
def test_complex(signal): """ Test transform-inverse works for complex input """ a = signal # create complex test vectors by adding random phase c = a + 1j*numpy.random.random(a.shape) x = stft.spectrogram(c, halved=False) y = stft.ispectrogram(x, halved=False) assert c.dtype == y.dtype assert numpy.allclose(c, y)
def test_precision(channels, padding, signal, framelength, halved): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram(a, framelength=framelength, padding=padding, halved=halved) y = stft.ispectrogram(x) assert numpy.allclose(a, y)
def test_rms(channels, padding, signal, framelength, halved): """ Test if transform-inverse identity holds """ a = signal x = stft.spectrogram(a, framelength=framelength, padding=padding, halved=halved) y = stft.ispectrogram(x) assert numpy.sqrt(numpy.mean((a - y)**2)) < 1e-8
def icmdct( X, odd=True, transforms=None, **kwargs ): """ Calculate lapped inverse complex MDCT/MCLT of input signal Parameters ---------- x : array_like The input signal odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` Additional keyword arguments passed to :code:`stft.spectrogram` Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.icmdct : inverse complex MDCT """ if transforms is None: transforms = transforms_default return stft.ispectrogram( X, transform=functools.partial(transforms.icmdct, odd=odd), halved=False, **kwargs )
wavfile.write(fn, fs, data) if __name__ == '__main__': spectrogram_args = {'framelength': 512} rate_clean, data_clean = wavfile.read(CLEAN_FILE) rate_noise, data_noise = wavfile.read(NOISE_FILE) data_len = len(data_clean) data_noise = data_noise[:data_len] print(data_clean.dtype) print(data_noise.dtype) data_combined = np.array( [s1 / 2 + s2 / 2 for (s1, s2) in zip(data_clean, data_noise)], dtype=np.int16) # data_combined = data_noise print(data_combined.dtype) wavfile.write('%scombined.wav' % (OUTPUT_DIR), rate_clean, data_combined) Sx_clean = stft.spectrogram(data_clean, **spectrogram_args) Sx_noise = stft.spectrogram(data_noise, **spectrogram_args) reverted_clean = stft.ispectrogram(Sx_clean) reverted_noise = stft.ispectrogram(Sx_noise) writeWav('%soriginal_clean.wav' % (OUTPUT_DIR), rate_clean, reverted_clean) writeWav('%soriginal_noise.wav' % (OUTPUT_DIR), rate_noise, reverted_noise)
def imdct( X, odd=True, transforms=None, **kwargs ): """ Calculate lapped inverse MDCT of input signal Parameters ---------- x : array_like The spectrogram to be inverted. May be a 2D matrix for single channel or a 3D tensor for multi channel data. In case of a mono signal, the data must be in the shape of :code:`bins x frames`. In case of a multi channel signal, the data must be in the shape of :code:`bins x frames x channels`. odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. framelength : int The signal frame length. Defaults to infer from data. hopsize : int The signal frame hopsize. Defaults to infer from data. Setting this value will override :code:`overlap`. overlap : int The signal frame overlap coefficient. Value :code:`x` means :code:`1/x` overlap. Defaults to infer from data. Note that anything but :code:`2` will result in a filterbank without perfect reconstruction. centered : boolean Pad input signal so that the first and last window are centered around the beginning of the signal. Defaults to to infer from data. The first and last half-frame will have aliasing, so using centering during forward MDCT is recommended. window : callable, array_like Window to be used for deringing. Can be :code:`False` to disable windowing. Defaults to to infer from data. halved : boolean Switch to reconstruct the other halve of the spectrum if the forward transform has been truncated. Defaults to to infer from data. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` padding : int Zero-pad signal with x times the number of samples. Defaults to infer from data. outlength : int Crop output signal to length. Useful when input length of spectrogram did not fit into framelength and input data had to be padded. Not setting this value will disable cropping, the output data may be longer than expected. Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.imdct : inverse MDCT """ if transforms is None: transforms = transforms_default kwargs.setdefault('framelength', 2048) if not odd: return stft.ispectrogram( X, transform=[ functools.partial(transforms.imdct, odd=False), functools.partial(transforms.imdst, odd=False), ], halved=False, **kwargs ) else: return stft.ispectrogram( X, transform=transforms.imdct, halved=False, **kwargs )
def imdct(X, odd=True, transforms=None, **kwargs): """ Calculate lapped inverse MDCT of input signal Parameters ---------- x : array_like The spectrogram to be inverted. May be a 2D matrix for single channel or a 3D tensor for multi channel data. In case of a mono signal, the data must be in the shape of :code:`bins x frames`. In case of a multi channel signal, the data must be in the shape of :code:`bins x frames x channels`. odd : boolean, optional Switch to oddly stacked transform. Defaults to :code:`True`. framelength : int The signal frame length. Defaults to infer from data. hopsize : int The signal frame hopsize. Defaults to infer from data. Setting this value will override :code:`overlap`. overlap : int The signal frame overlap coefficient. Value :code:`x` means :code:`1/x` overlap. Defaults to infer from data. Note that anything but :code:`2` will result in a filterbank without perfect reconstruction. centered : boolean Pad input signal so that the first and last window are centered around the beginning of the signal. Defaults to to infer from data. The first and last half-frame will have aliasing, so using centering during forward MDCT is recommended. window : callable, array_like Window to be used for deringing. Can be :code:`False` to disable windowing. Defaults to to infer from data. halved : boolean Switch to reconstruct the other halve of the spectrum if the forward transform has been truncated. Defaults to to infer from data. transforms : module, optional Module reference to core transforms. Mostly used to replace fast with slow core transforms, for testing. Defaults to :mod:`mdct.fast` padding : int Zero-pad signal with x times the number of samples. Defaults to infer from data. outlength : int Crop output signal to length. Useful when input length of spectrogram did not fit into framelength and input data had to be padded. Not setting this value will disable cropping, the output data may be longer than expected. Returns ------- out : array_like The output signal See Also -------- mdct.fast.transforms.imdct : inverse MDCT """ if transforms is None: transforms = transforms_default kwargs.setdefault('framelength', 2048) if not odd: return stft.ispectrogram(X, transform=[ functools.partial(transforms.imdct, odd=False), functools.partial(transforms.imdst, odd=False), ], halved=False, **kwargs) else: return stft.ispectrogram(X, transform=transforms.imdct, halved=False, **kwargs)
# np.concatenate(((np.zeros((65,i+1))),test_data[i+1,0,:,:]),axis=1).shape add = np.concatenate((add,np.zeros((65,1))),axis=1)\ + np.concatenate(((np.zeros((65,i+1))),pred_data[i+1,:,:]),axis=1) avg_out = add/20.0 alpha = 0.5 Male_binary_out = np.array(avg_out > alpha)#,dtype=int) Female_binary_out = np.array(avg_out < (1-alpha))#,dtype=int) xf_test = xf_deci[newfrate*120:xfnor.size] # original samples not noramalized. xm_test = xm_deci[newfrate*120:xfnor.size] mix_test = np.short(xf_test + xm_test) mixspec_test = stft.spectrogram(mix_test,framelength=128,hopsize=16,\ window=scipy.signal.hanning) Male_output = Male_binary_out*(mixspec_test) Female_output = Female_binary_out*(mixspec_test) male_audio_recover = stft.ispectrogram(Male_output,framelength=128,hopsize=16,\ window=scipy.signal.hanning) female_audio_recover = stft.ispectrogram(Female_output,framelength=128,hopsize=16,\ window=scipy.signal.hanning) writewave('./male_recovered.wav',male_audio_recover,f1rate,2,1) writewave('./female_recovered2.wav',np.short(female_audio_recover),f1rate,2,1) #pylab.pcolormesh(Male_binary_out*(10*np.log10(xmixspectest[:,1:-3]))) #pylab.pcolormesh(np.nan_to_num(10*np.log10(Female_output))) ################################################
def createMatrix(): # spectrogram_arguments = {'framelength': 512, 'overlap': 512, 'window': scipy.signal.hamming(512)} def saveFile(fn, data): f = open(fn, 'wb') pickle.dump(data, f) f.close() fs1, data1 = wavfile.read(raw1) fs2, data2 = wavfile.read(raw2) minlen = min(len(data1), len(data2)) data1 = data1[:minlen] data2 = data2[:minlen] spec1 = stft.spectrogram(data1) spec2 = stft.spectrogram(data2) # Reduce dimension spec1 = squeeze(spec1) spec2 = squeeze(spec2) # same dimensions a = np.zeros(spec1.shape) b = np.zeros(spec2.shape) # hard for i in range(len(spec1)): for j in range(len(spec1[0])): if abs(spec1[i][j]) < abs(spec2[i][j]): b[i][j] = 1.0 else: a[i][j] = 1.0 # soft # for i in range(len(spec1)): # for j in range(len(spec1[0])): # if (abs(spec1[i][j]) + abs(spec2[i][j])) == 0: # continue # a[i][j] = abs(spec1[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j])) # b[i][j] = abs(spec2[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j])) def plotfft(data, sr, ylim=None): plt.plot(np.abs(data)) if ylim != None: plt.ylim(ylim) plt.show() fs, data = wavfile.read(merged) spec = stft.spectrogram(data) spec = squeeze(spec) # ax1 = plt.subplot(211) time = np.arange(0, 7.6382, 0.0001) # plt.plot(time, data1) plt.xlim([0, 2]) # plt.subplot(212) Pxx, freqs, bins, im = plt.specgram(data, NFFT=200, Fs=fs, noverlap=100, cmap=plt.cm.gist_heat) plt.show() return output_a = createSpectrogram(np.multiply(a, spec), spec) output_b = createSpectrogram(np.multiply(b, spec), spec) output_a2 = stft.ispectrogram(output_a) output_b2 = stft.ispectrogram(output_b) writeWav(separated_dir + "a.wav", fs1, output_a2) writeWav(separated_dir + "b.wav", fs1, output_b2) return
def wav_from_magnitude_phase(magnitude, phase, dtype): fourier = magnitude * phase return ispectrogram(fourier).real.astype(dtype)
import stft import scipy.io.wavfile as wav fs, audio = wav.read('nto2.wav','r') specgram = stft.spectrogram(audio) output = stft.ispectrogram(specgram) print output
rows2, columns2 = spectragram2.shape for r in range(0, rows2): if spectral_fit_predict_reversed[r] == 0: for c in range(0, columns2): spectragram_db[r, c] = 0 spectragram2[r, c] = 0 directory = '01_spectral_clustering_spec/result01/' output_file = directory + 'output.wav' plot_file = directory + 'spectral.png' if not os.path.exists(directory): os.makedirs(directory) output = stft.ispectrogram(spectragram2) wavfile.write(output_file, fs, output) plt.figure(1).set_size_inches(12, 8) plt.figure(1).subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.9, wspace=0.6, hspace=0.8) plt.pcolormesh(spectragram_db, cmap="YlGnBu") plt.ylabel('Frequency [Hz]') plt.xlabel('Samples') plt.savefig(plot_file, dpi=300) plt.figure(2).set_size_inches(12, 8)
def model_test(test_input): test_rate, test_audio = wavfile.read(test_input) clean_rate, clean_audio = wavfile.read(CLEAN_FILE) noise_rate, noise_audio = wavfile.read(NOISE_FILE) length = len(clean_audio) noise_audio = noise_audio[:length] clean_spec = stft.spectrogram(clean_audio) noise_spec = stft.spectrogram(noise_audio) test_spec = stft.spectrogram(test_audio) reverted_clean = stft.ispectrogram(clean_spec) reverted_noise = stft.ispectrogram(noise_spec) test_data = np.array([test_spec.transpose() / 100000 ]) # make data a batch of 1 with tf.Graph().as_default(): model = SeparationModel() saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('checkpoints/') if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) test_data_shape = np.shape(test_data) dummy_target = np.zeros((test_data_shape[0], test_data_shape[1], 2 * test_data_shape[2])) output, _, _ = model.train_on_batch(session, test_data, dummy_target, train=False) num_freq_bin = output.shape[2] / 2 clean_output = output[0, :, :num_freq_bin] noise_output = output[0, :, num_freq_bin:] clean_mask, noise_mask = create_mask(clean_output, noise_output) clean_spec = createSpectrogram( np.multiply(clean_mask.transpose(), test_spec), test_spec.stft_settings) noise_spec = createSpectrogram( np.multiply(noise_mask.transpose(), test_spec), test_spec.stft_settings) clean_wav = stft.ispectrogram(clean_spec) noise_wav = stft.ispectrogram(noise_spec) sdr, sir, sar, _ = bss_eval_sources( np.array([reverted_clean, reverted_noise]), np.array([clean_wav, noise_wav]), False) print(sdr, sir, sar) writeWav('data/test_combined/output_clean.wav', 44100, clean_wav) writeWav('data/test_combined/output_noise.wav', 44100, noise_wav)
def model_batch_test(): test_batch = h5py.File('%stest_batch' % (DIR)) data = test_batch['data'].value with open('%stest_settings.pkl' % (DIR), 'rb') as f: settings = pickle.load(f) # print(settings[:2]) combined, clean, noise = zip(data) combined = combined[0] clean = clean[0] noise = noise[0] target = np.concatenate((clean, noise), axis=2) # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav') # test_spec = stft.spectrogram(test_audio) combined_batch, target_batch = create_batch(combined, target, 50) original_combined_batch = [ copy.deepcopy(batch) for batch in combined_batch ] with tf.Graph().as_default(): model = SeparationModel() saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('checkpoints/') if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) curr_mask_array = [] prev_mask_array = None diff = float('inf') iters = 0 while True: iters += 1 output, _, _ = model.train_on_batch(session, combined_batch[0], target_batch[0], train=False) num_freq_bin = output.shape[2] / 2 clean_outputs = output[:, :, :num_freq_bin] noise_outputs = output[:, :, num_freq_bin:] # clean = [target[:,:num_freq_bin] for target in target_batch] # noise = [target[:,num_freq_bin:] for target in target_batch] num_outputs = len(clean_outputs) results = [] for i in xrange(num_outputs): orig_clean_output = clean_outputs[i] orig_noise_output = noise_outputs[i] stft_settings = copy.deepcopy(settings[i]) orig_length = stft_settings['orig_length'] stft_settings.pop('orig_length', None) clean_output = orig_clean_output[-orig_length:] noise_output = orig_noise_output[-orig_length:] clean_mask, noise_mask = create_mask( clean_output, noise_output) orig_clean_mask, orig_noise_mask = create_mask( orig_clean_output, orig_noise_output) curr_mask_array.append(clean_mask) # if i == 0: # print clean_mask[10:20,10:20] curr_mask_array.append(noise_mask) clean_spec = createSpectrogram( np.multiply( clean_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) noise_spec = createSpectrogram( np.multiply( noise_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) # print '-' * 20 # print original_combined_batch[0][i] # print '=' * 20 combined_batch[0][i] += np.multiply( orig_clean_mask, original_combined_batch[0][i]) * 0.1 # print combined_batch[0][i] # print '=' * 20 # print original_combined_batch[0][i] # print '-' * 20 estimated_clean_wav = stft.ispectrogram(clean_spec) estimated_noise_wav = stft.ispectrogram(noise_spec) reference_clean_wav = stft.ispectrogram( SpectrogramArray(clean[i][-orig_length:], stft_settings).transpose()) reference_noise_wav = stft.ispectrogram( SpectrogramArray(noise[i][-orig_length:], stft_settings).transpose()) try: sdr, sir, sar, _ = bss_eval_sources( np.array( [reference_clean_wav, reference_noise_wav]), np.array( [estimated_clean_wav, estimated_noise_wav]), False) results.append( (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) except ValueError: print('error') continue break # diff = 1 # if prev_mask_array is not None: # # print curr_mask_array[0] # # print prev_mask_array[0] # diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array))) # print('Changes after iteration %d: %d' % (iters, diff)) # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results) # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises))) # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array] # if diff == 0: # break results_filename = '%sresults_%d_%f' % ( 'data/results/', Config.num_layers, Config.lr) # results_filename += 'freq_weighted' with open(results_filename + '.csv', 'w+') as f: for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results: f.write('%f,%f,%f,%f,%f,%f\n' % (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))