def bss_eval_global(mixed_wav, src1_wav, src2_wav, pred_src1_wav, pred_src2_wav): len_cropped = pred_src1_wav.shape[-1] src1_wav = src1_wav[:len_cropped] src2_wav = src2_wav[:len_cropped] mixed_wav = mixed_wav[:len_cropped] gnsdr, gsir, gsar = np.zeros(2), np.zeros(2), np.zeros(2) total_len = 0 sdr, sir, sar, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([pred_src1_wav, pred_src2_wav]), False) sdr_mixed, _, _, _ = bss_eval_sources(np.array([src1_wav, src2_wav]), np.array([mixed_wav, mixed_wav]), False) nsdr = sdr - sdr_mixed gnsdr += len_cropped * nsdr gsir += len_cropped * sir gsar += len_cropped * sar total_len += len_cropped gnsdr = gnsdr / total_len gsir = gsir / total_len gsar = gsar / total_len return gnsdr, gsir, gsar
def model_batch_test(): test_batch = h5py.File('%stest_batch' % (DIR)) data = test_batch['data'].value with open('%stest_settings.pkl' % (DIR), 'rb') as f: settings = pickle.load(f) # print(settings[:2]) combined, clean, noise = zip(data) combined = combined[0] clean = clean[0] noise = noise[0] target = np.concatenate((clean, noise), axis=2) # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav') # test_spec = stft.spectrogram(test_audio) combined_batch, target_batch = create_batch(combined, target, 50) original_combined_batch = [ copy.deepcopy(batch) for batch in combined_batch ] with tf.Graph().as_default(): model = SeparationModel() saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('checkpoints/') if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) curr_mask_array = [] prev_mask_array = None diff = float('inf') iters = 0 while True: iters += 1 output, _, _ = model.train_on_batch(session, combined_batch[0], target_batch[0], train=False) num_freq_bin = output.shape[2] / 2 clean_outputs = output[:, :, :num_freq_bin] noise_outputs = output[:, :, num_freq_bin:] # clean = [target[:,:num_freq_bin] for target in target_batch] # noise = [target[:,num_freq_bin:] for target in target_batch] num_outputs = len(clean_outputs) results = [] for i in xrange(num_outputs): orig_clean_output = clean_outputs[i] orig_noise_output = noise_outputs[i] stft_settings = copy.deepcopy(settings[i]) orig_length = stft_settings['orig_length'] stft_settings.pop('orig_length', None) clean_output = orig_clean_output[-orig_length:] noise_output = orig_noise_output[-orig_length:] clean_mask, noise_mask = create_mask( clean_output, noise_output) orig_clean_mask, orig_noise_mask = create_mask( orig_clean_output, orig_noise_output) curr_mask_array.append(clean_mask) # if i == 0: # print clean_mask[10:20,10:20] curr_mask_array.append(noise_mask) clean_spec = createSpectrogram( np.multiply( clean_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) noise_spec = createSpectrogram( np.multiply( noise_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) # print '-' * 20 # print original_combined_batch[0][i] # print '=' * 20 combined_batch[0][i] += np.multiply( orig_clean_mask, original_combined_batch[0][i]) * 0.1 # print combined_batch[0][i] # print '=' * 20 # print original_combined_batch[0][i] # print '-' * 20 estimated_clean_wav = stft.ispectrogram(clean_spec) estimated_noise_wav = stft.ispectrogram(noise_spec) reference_clean_wav = stft.ispectrogram( SpectrogramArray(clean[i][-orig_length:], stft_settings).transpose()) reference_noise_wav = stft.ispectrogram( SpectrogramArray(noise[i][-orig_length:], stft_settings).transpose()) try: sdr, sir, sar, _ = bss_eval_sources( np.array( [reference_clean_wav, reference_noise_wav]), np.array( [estimated_clean_wav, estimated_noise_wav]), False) results.append( (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) except ValueError: print('error') continue break # diff = 1 # if prev_mask_array is not None: # # print curr_mask_array[0] # # print prev_mask_array[0] # diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array))) # print('Changes after iteration %d: %d' % (iters, diff)) # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results) # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises))) # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array] # if diff == 0: # break results_filename = '%sresults_%d_%f' % ( 'data/results/', Config.num_layers, Config.lr) # results_filename += 'freq_weighted' with open(results_filename + '.csv', 'w+') as f: for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results: f.write('%f,%f,%f,%f,%f,%f\n' % (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))
def model_test(test_input): test_rate, test_audio = wavfile.read(test_input) clean_rate, clean_audio = wavfile.read(CLEAN_FILE) noise_rate, noise_audio = wavfile.read(NOISE_FILE) length = len(clean_audio) noise_audio = noise_audio[:length] clean_spec = stft.spectrogram(clean_audio) noise_spec = stft.spectrogram(noise_audio) test_spec = stft.spectrogram(test_audio) reverted_clean = stft.ispectrogram(clean_spec) reverted_noise = stft.ispectrogram(noise_spec) test_data = np.array([test_spec.transpose() / 100000 ]) # make data a batch of 1 with tf.Graph().as_default(): model = SeparationModel() saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('checkpoints/') if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) test_data_shape = np.shape(test_data) dummy_target = np.zeros((test_data_shape[0], test_data_shape[1], 2 * test_data_shape[2])) output, _, _ = model.train_on_batch(session, test_data, dummy_target, train=False) num_freq_bin = output.shape[2] / 2 clean_output = output[0, :, :num_freq_bin] noise_output = output[0, :, num_freq_bin:] clean_mask, noise_mask = create_mask(clean_output, noise_output) clean_spec = createSpectrogram( np.multiply(clean_mask.transpose(), test_spec), test_spec.stft_settings) noise_spec = createSpectrogram( np.multiply(noise_mask.transpose(), test_spec), test_spec.stft_settings) clean_wav = stft.ispectrogram(clean_spec) noise_wav = stft.ispectrogram(noise_spec) sdr, sir, sar, _ = bss_eval_sources( np.array([reverted_clean, reverted_noise]), np.array([clean_wav, noise_wav]), False) print(sdr, sir, sar) writeWav('data/test_combined/output_clean.wav', 44100, clean_wav) writeWav('data/test_combined/output_noise.wav', 44100, noise_wav)