def createSpectrogram(arr, orig): x = SpectrogramArray(arr, stft_settings={ 'framelength': orig.stft_settings['framelength'], 'hopsize': orig.stft_settings['hopsize'], 'overlap': orig.stft_settings['overlap'], 'centered': orig.stft_settings['centered'], 'window': orig.stft_settings['window'], 'halved': orig.stft_settings['halved'], 'transform': orig.stft_settings['transform'], 'padding': orig.stft_settings['padding'], 'outlength': orig.stft_settings['outlength'], }) return x
def model_batch_test(): test_batch = h5py.File('%stest_batch' % (DIR)) data = test_batch['data'].value with open('%stest_settings.pkl' % (DIR), 'rb') as f: settings = pickle.load(f) # print(settings[:2]) combined, clean, noise = zip(data) combined = combined[0] clean = clean[0] noise = noise[0] target = np.concatenate((clean, noise), axis=2) # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav') # test_spec = stft.spectrogram(test_audio) combined_batch, target_batch = create_batch(combined, target, 50) original_combined_batch = [ copy.deepcopy(batch) for batch in combined_batch ] with tf.Graph().as_default(): model = SeparationModel() saver = tf.train.Saver(tf.trainable_variables()) with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('checkpoints/') if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) curr_mask_array = [] prev_mask_array = None diff = float('inf') iters = 0 while True: iters += 1 output, _, _ = model.train_on_batch(session, combined_batch[0], target_batch[0], train=False) num_freq_bin = output.shape[2] / 2 clean_outputs = output[:, :, :num_freq_bin] noise_outputs = output[:, :, num_freq_bin:] # clean = [target[:,:num_freq_bin] for target in target_batch] # noise = [target[:,num_freq_bin:] for target in target_batch] num_outputs = len(clean_outputs) results = [] for i in xrange(num_outputs): orig_clean_output = clean_outputs[i] orig_noise_output = noise_outputs[i] stft_settings = copy.deepcopy(settings[i]) orig_length = stft_settings['orig_length'] stft_settings.pop('orig_length', None) clean_output = orig_clean_output[-orig_length:] noise_output = orig_noise_output[-orig_length:] clean_mask, noise_mask = create_mask( clean_output, noise_output) orig_clean_mask, orig_noise_mask = create_mask( orig_clean_output, orig_noise_output) curr_mask_array.append(clean_mask) # if i == 0: # print clean_mask[10:20,10:20] curr_mask_array.append(noise_mask) clean_spec = createSpectrogram( np.multiply( clean_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) noise_spec = createSpectrogram( np.multiply( noise_mask.transpose(), original_combined_batch[0] [i][-orig_length:].transpose()), settings[i]) # print '-' * 20 # print original_combined_batch[0][i] # print '=' * 20 combined_batch[0][i] += np.multiply( orig_clean_mask, original_combined_batch[0][i]) * 0.1 # print combined_batch[0][i] # print '=' * 20 # print original_combined_batch[0][i] # print '-' * 20 estimated_clean_wav = stft.ispectrogram(clean_spec) estimated_noise_wav = stft.ispectrogram(noise_spec) reference_clean_wav = stft.ispectrogram( SpectrogramArray(clean[i][-orig_length:], stft_settings).transpose()) reference_noise_wav = stft.ispectrogram( SpectrogramArray(noise[i][-orig_length:], stft_settings).transpose()) try: sdr, sir, sar, _ = bss_eval_sources( np.array( [reference_clean_wav, reference_noise_wav]), np.array( [estimated_clean_wav, estimated_noise_wav]), False) results.append( (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1])) except ValueError: print('error') continue break # diff = 1 # if prev_mask_array is not None: # # print curr_mask_array[0] # # print prev_mask_array[0] # diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array))) # print('Changes after iteration %d: %d' % (iters, diff)) # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results) # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises))) # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array] # if diff == 0: # break results_filename = '%sresults_%d_%f' % ( 'data/results/', Config.num_layers, Config.lr) # results_filename += 'freq_weighted' with open(results_filename + '.csv', 'w+') as f: for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results: f.write('%f,%f,%f,%f,%f,%f\n' % (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))