def do_summary(self, m_summary, sess, itr): valid_path = self.valid_path clean_speech = self.clean_speech clean_speech = utils.identity_trans(clean_speech) noisy_speech = self.noisy_speech noisy_speech = utils.identity_trans(noisy_speech) temp_dir = self.temp_dir name = self.name logs_dir = self.logs_dir writer = SummaryWriter(log_dir=self.logs_dir + '/summary') summary_dr = dr.DataReader(temp_dir, '', valid_path["norm_path"], dist_num=config.dist_num, is_training=False, is_shuffle=False) pred = [] while True: summary_inputs, summary_labels = summary_dr.next_batch(config.batch_size) feed_dict = {m_summary.inputs: summary_inputs, m_summary.labels: summary_labels, m_summary.keep_prob: 1.0} pred_temp = sess.run(m_summary.pred, feed_dict=feed_dict) pred.append(pred_temp) if summary_dr.file_change_checker(): phase = summary_dr.phase[0] lpsd = np.expand_dims( np.reshape(np.concatenate(pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :], axis=2) mean, std = summary_dr.norm_process(valid_path["norm_path"] + '/norm_noisy.mat') lpsd = np.squeeze((lpsd * std * 1.18) + mean) # denorm recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)), win_size=config.win_size, win_step=config.win_step, fs=config.fs) # plt.plot(recon_speech) # plt.show() # lab = np.reshape(np.asarray(lab), [-1, 1]) summary_dr.reader_initialize() break # write summary if itr == config.summary_step: writer.close() self.noisy_measure = utils.se_eval(clean_speech, np.squeeze(noisy_speech), float(config.fs)) summary_fname = tf.summary.text(name + '_filename', tf.convert_to_tensor(self.noisy_dir)) if name == 'train': config_str = "<br>sampling frequency: %d</br>" \ "<br>window step: %d ms</br>" \ "<br>window size: %d ms</br>" \ "<br>fft size: %d</br>" \ "<br>learning rate: %f</br><br>learning rate decay: %.4f</br><br>learning" \ " rate decay frequency: %.4d</br>" \ "<br>dropout rate: %.4f</br><br>max epoch:" \ " %.4e</br><br>batch size: %d</br><br>model type: %s</br>"\ % (config.fs, (config.win_step/config.fs*1000), (config.win_size/config.fs*1000), config.nfft, config.lr, config.lrDecayRate, config.lrDecayFreq, config.keep_prob, config.max_epoch, config.batch_size, config.mode) summary_config = tf.summary.text(name + '_configuration', tf.convert_to_tensor(config_str)) code_list = [] read_flag = False with open('./lib/trnmodel.py', 'r') as f: while True: line = f.readline() if "def inference(self, inputs):" in line: read_flag = True if "return fm" in line: code_list.append('<br>' + line.replace('\n', '') + '</br>') break if read_flag: code_list.append('<br>' + line.replace('\n', '') + '</br>') code_list = "<pre>" + "".join(code_list) + "</pre>" summary_model = tf.summary.text('train_model', tf.convert_to_tensor(code_list)) summary_op = tf.summary.merge([summary_fname, summary_config, summary_model]) else: summary_op = tf.summary.merge([summary_fname]) with tf.Session() as sess: summary_writer = tf.summary.FileWriter(logs_dir + '/summary/text') text = sess.run(summary_op) summary_writer.add_summary(text, 1) summary_writer.close() writer = SummaryWriter(log_dir=logs_dir + '/summary') writer.add_audio(name + '_audio_ref' + '/clean', clean_speech /np.max(np.abs(clean_speech)), itr, sample_rate=config.fs) writer.add_audio(name + '_audio_ref' + '/noisy', noisy_speech /np.max(np.abs(noisy_speech)), itr, sample_rate=config.fs) clean_S = get_spectrogram(clean_speech) noisy_S = get_spectrogram(noisy_speech) writer.add_image(name + '_spectrogram_ref' + '/clean', clean_S, itr) # image_shape = (C, H, W) writer.add_image(name + '_spectrogram_ref' + '/noisy', noisy_S, itr) # image_shape = (C, H, W) enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs)) writer.add_scalars(name + '_speech_quality' + '/pesq', {'enhanced': enhanced_measure['pesq'], 'ref': self.noisy_measure['pesq']}, itr) writer.add_scalars(name + '_speech_quality' + '/stoi', {'enhanced': enhanced_measure['stoi'], 'ref': self.noisy_measure['stoi']}, itr) writer.add_scalars(name + '_speech_quality' + '/lsd', {'enhanced': enhanced_measure['lsd'], 'ref': self.noisy_measure['lsd']}, itr) writer.add_scalars(name + '_speech_quality' + '/ssnr', {'enhanced': enhanced_measure['ssnr'], 'ref': self.noisy_measure['ssnr']}, itr) writer.add_audio(name + '_audio_enhanced' + '/enhanced', recon_speech/np.max(np.abs(recon_speech)), itr, sample_rate=config.fs) enhanced_S = get_spectrogram(recon_speech) writer.add_image(name + '_spectrogram_enhanced' + '/enhanced', enhanced_S, itr) # image_shape = (C, H, W) writer.close()
file_num = int(os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split("num")[-1]) - 1 snr_num = int(os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1 noise_num = int(os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1 for clean_name in clean_list: if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]: clean_dir = clean_name break print(noisy_dir) # recon_speech = speech_enhance(noisy_dir, graph_name) recon_speech = se.enhance(noisy_dir) noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir)) clean_speech = utils.identity_trans(utils.read_raw(clean_dir)) noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng) enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng) noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq'] noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi'] noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr'] noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd'] enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq'] enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi'] enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr'] enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd'] noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0) noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0) noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0)
def test(clean_dir=clean_dir, noisy_dir=noisy_dir, norm_dir=norm_dir): # logs_dir = os.path.abspath('./logs' + '/logs_' + "2018-06-04-02-06-49") model_dir = os.path.abspath('./saved_model') # gs.freeze_graph(logs_dir, model_dir, 'model_1/pred,model_1/labels,model_1/cost') graph_name = sorted(glob.glob(model_dir + '/*.pb'))[-1] # graph_name = '/home/jtkim/hdd3/github_2/SE_graph/Boost_2/Boost_2.pb' noisy_list = sorted(glob.glob(noisy_dir + '/*.raw')) clean_list = sorted(glob.glob(clean_dir + '/*.raw')) num_data = len(clean_list) noisy_result = { 'noisy_pesq': np.zeros((num_data, 4, 15)), 'noisy_stoi': np.zeros((num_data, 4, 15)), 'noisy_ssnr': np.zeros((num_data, 4, 15)), 'noisy_lsd': np.zeros((num_data, 4, 15)) } enhance_result = { 'enhanced_pesq': np.zeros((num_data, 4, 15)), 'enhanced_stoi': np.zeros((num_data, 4, 15)), 'enhanced_ssnr': np.zeros((num_data, 4, 15)), 'enhanced_lsd': np.zeros((num_data, 4, 15)) } se = SE(graph_name=graph_name, norm_path=norm_dir) eng = matlab.engine.start_matlab() eng.addpath(eng.genpath('.')) for noisy_dir in noisy_list: file_num = int( os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split( "num")[-1]) - 1 snr_num = int( os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1 noise_num = int( os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1 for clean_name in clean_list: if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]: clean_dir = clean_name break print(noisy_dir) # recon_speech = speech_enhance(noisy_dir, graph_name) recon_speech = se.enhance(noisy_dir) noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir)) clean_speech = utils.identity_trans(utils.read_raw(clean_dir)) noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng) enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng) noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq'] noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi'] noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr'] noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd'] enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq'] enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi'] enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr'] enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd'] noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0) noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0) noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0) noisy_result['noisy_lsd'] = np.mean(noisy_result['noisy_lsd'], axis=0) enhance_result['enhanced_pesq'] = np.mean(enhance_result['enhanced_pesq'], axis=0) enhance_result['enhanced_stoi'] = np.mean(enhance_result['enhanced_stoi'], axis=0) enhance_result['enhanced_ssnr'] = np.mean(enhance_result['enhanced_ssnr'], axis=0) enhance_result['enhanced_lsd'] = np.mean(enhance_result['enhanced_lsd'], axis=0) scipy.io.savemat('./test_result/noisy_result.mat', noisy_result) scipy.io.savemat('./test_result/enhanced_result.mat', enhance_result) eng.exit()
def se_test(wav_dir, noise_dir, snr, noise_type=1): # clean_speech, clean_fs = librosa.load(wav_dir, config.fs) clean_speech = utils.read_raw(wav_dir) eng = matlab.engine.start_matlab() # noisy_speech = np.array(eng.noise_add(wav_dir, noise_dir, noise_type, snr, nargout=1)) # noisy_speech, noisy_fs = librosa.load(noise_dir, config.fs) noisy_speech = utils.read_raw(noise_dir) # noisy_measure = se_eval(clean_speech, np.squeeze(noisy_speech), float(config.fs)) temp_dir = './data/test/temp/temp.npy' np.save(temp_dir, noisy_speech) graph_name = sorted(glob.glob('./saved_model/*.pb'))[-1] graph = gt.load_graph(graph_name) norm_path = os.path.abspath('./data/train/norm') test_dr = dr.DataReader(temp_dir, '', norm_path, dist_num=config.dist_num, is_training=False, is_shuffle=False) node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0') node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0') node_keep_prob = graph.get_tensor_by_name('prefix/model_1/keep_prob:0') node_prediction = graph.get_tensor_by_name('prefix/model_1/pred:0') pred = [] lab = [] while True: test_inputs, test_labels = test_dr.next_batch(config.test_batch_size) feed_dict = { node_inputs: test_inputs, node_labels: test_labels, node_keep_prob: 1.0 } sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=sess_config) as sess: pred_temp, lab_temp = sess.run([node_prediction, node_labels], feed_dict=feed_dict) pred.append(pred_temp) lab.append(lab_temp) if test_dr.file_change_checker(): phase = test_dr.phase[0] lpsd = np.expand_dims(np.reshape(np.concatenate( pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :], axis=2) mean, std = test_dr.norm_process(norm_path + '/norm_noisy.mat') lpsd = np.squeeze((lpsd * std) + mean) # denorm recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)), win_size=config.win_size, win_step=config.win_step, fs=config.fs) # plt.plot(recon_speech) # plt.show() # lab = np.reshape(np.asarray(lab), [-1, 1]) test_dr.reader_initialize() break # os.popen('rm -rf ' + temp_dir) noisy_measure = utils.se_eval( clean_speech[0:recon_speech.shape[0]], np.squeeze(noisy_speech[0:recon_speech.shape[0]]), float(config.fs)) enhanced_measure = utils.se_eval(clean_speech[0:recon_speech.shape[0]], recon_speech, float(config.fs)) print("pesq: %.4f -> %.4f" % (noisy_measure["pesq"], enhanced_measure["pesq"])) print("lsd: %.4f -> %.4f" % (noisy_measure["lsd"], enhanced_measure["lsd"])) print("stoi: %.4f -> %.4f" % (noisy_measure["stoi"], enhanced_measure["stoi"])) print("ssnr: %.4f -> %.4f" % (noisy_measure["ssnr"], enhanced_measure["ssnr"])) plt.subplot(3, 1, 1) S = librosa.amplitude_to_db(librosa.stft( clean_speech[0:recon_speech.shape[0]], hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.subplot(3, 1, 2) S = librosa.amplitude_to_db(librosa.stft(np.squeeze( noisy_speech[0:recon_speech.shape[0]]), hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.subplot(3, 1, 3) S = librosa.amplitude_to_db(librosa.stft(recon_speech, hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.show() return recon_speech