def __init__(self, valid_path, logs_dir, name='train'): if name == 'train': noisy_dir = sorted(glob.glob(valid_path["train_input_path"] + '/*.raw')) noisy_dir = noisy_dir[np.random.randint(len(noisy_dir))] clean_list = sorted(glob.glob(valid_path["train_output_path"] + '/*.raw')) else: noisy_dir = sorted(glob.glob(valid_path["valid_input_path"] + '/*.raw'))[config.summary_fnum] clean_list = sorted(glob.glob(valid_path["valid_output_path"] + '/*.raw')) for clean_name in clean_list: if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]: clean_dir = clean_name break self.noisy_dir = noisy_dir self.valid_path = valid_path self.clean_speech = utils.read_raw(clean_dir) self.noisy_speech = utils.read_raw(noisy_dir) self.noisy_measure = {} if self.clean_speech.shape[0] > 30000: self.clean_speech = self.clean_speech[0:30000] self.noisy_speech = self.noisy_speech[0:30000] self.temp_dir = './data/' + name + '/temp/temp.npy' self.name = name self.logs_dir = logs_dir np.save(self.temp_dir, self.noisy_speech)
def _read_output(self): for i, fname in enumerate(self._output_file_list): \ fname = fname.split('num')[-1].split('.raw')[0] if self._dataindx == fname: # print(self._output_file_list[i]) dataname = os.path.dirname(self._output_file_list[i]) + \ '/' + os.path.basename(self._output_file_list[i]).split('.')[0] + '.bin' if os.path.exists(dataname): feat_spec = np.load(dataname.replace('.bin', '.npy')).item() feat_shape = feat_spec['shape'] feat_max = feat_spec['max'] feat = utils.read_raw(dataname).reshape( feat_shape) * np.float32(feat_max) else: data = utils.read_raw(self._output_file_list[i]) if config.parallel: feat = self.lpsd_dist_p( data, self._dist_num, is_patch=False ) # (The number of samples, config.freq_size, 1, 1) else: feat = self.lpsd_dist( data, self._dist_num, is_patch=False ) # (The number of samples, config.freq_size, 1, 1) utils.write_bin(feat, np.max(np.abs(feat)), dataname) feat_spec = { 'shape': feat.shape, 'max': np.max(np.abs(feat)) } np.save(dataname.replace('.bin', ''), feat_spec) # plt.subplot(212) # S = librosa.amplitude_to_db( # librosa.stft(data, hop_length=config.win_step, # win_length=config.win_size, n_fft=config.nfft), ref=np.max) # ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) # # plt.show() # data, _ = librosa.load(self._output_file_list[i], config.fs) break # print('output') # if self._is_shuffle: # # feat = np.reshape(feat, (-1, self._batch_size, feat.shape[1], feat.shape[2], feat.shape[3])) # feat = np.reshape(feat[self._perm_indx, :], (-1, config.freq_size, 1, 1)) '''''' if self._is_shuffle: feat = feat[self._perm_indx, :] return np.squeeze(feat)
def get_stft_std(input_file_list, mean_dic, output): item = [] power_std_list = [] power_gstd_list = [] for fname in input_file_list: data = utils.read_raw(fname) # data, rate = librosa.load(fname, config.fs) lpsd, _ = utils.get_powerphase(data, config.win_size, config.win_step, config.nfft) power_std = np.mean( (lpsd - np.expand_dims(mean_dic['power_mean'], axis=1))**2, axis=1) power_gstd = (lpsd - mean_dic['power_gmean'])**2 power_std_list.append(power_std) power_gstd_list.append(power_gstd) power_std = np.mean(np.asarray(power_std_list), axis=0) power_gstd = np.mean(np.concatenate(power_gstd_list, axis=1)) item.append(power_std) item.append(power_gstd) output.put(item)
def get_stft_mean(input_file_list, output): item = [] power_mean_list = [] power_gmean_list = [] for fname in input_file_list: data = utils.read_raw(fname) # data, rate = librosa.load(fname, config.fs) lpsd, _ = utils.get_powerphase(data, config.win_size, config.win_step, config.nfft) power_mean = np.mean(lpsd, axis=1) power_gmean = np.mean(lpsd) power_mean_list.append(power_mean) power_gmean_list.append(power_gmean) power_mean = np.mean(np.asarray(power_mean_list), axis=0) power_gmean = np.mean(np.asarray(power_gmean_list)) item.append(power_mean) item.append(power_gmean) output.put(item)
def enhance(self, wav_dir): noisy_speech = utils.read_raw(wav_dir) temp_dir = './temp/temp.npy' np.save(temp_dir, noisy_speech) test_dr = dr.DataReader(temp_dir, '', self.norm_path, dist_num=config.dist_num, is_training=False, is_shuffle=False) mean, std = test_dr.norm_process(self.norm_path + '/norm_noisy.mat') while True: test_inputs, test_labels, test_inphase, test_outphase = test_dr.whole_batch( test_dr.num_samples) if config.mode != 'lstm' and config.mode != 'fcn': feed_dict = { self.node_inputs: test_inputs, self.node_labels: test_labels, self.node_keep_prob: 1.0 } else: feed_dict = { self.node_inputs: test_inputs, self.node_labels: test_labels } pred = self.sess.run(self.node_prediction, feed_dict=feed_dict) if test_dr.file_change_checker(): print(wav_dir) lpsd = np.expand_dims(np.reshape(pred, [-1, config.freq_size]), axis=2) lpsd = np.squeeze((lpsd * std * config.global_std) + mean) recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose( test_inphase, (1, 0)), win_size=config.win_size, win_step=config.win_step, fs=config.fs) test_dr.reader_initialize() break file_dir = self.save_dir + '/' + os.path.basename(wav_dir).replace( 'noisy', 'enhanced').replace('raw', 'wav') librosa.output.write_wav(file_dir, recon_speech, int(config.fs), norm=True) return recon_speech
def speech_enhance(wav_dir, graph_name): noisy_speech = utils.read_raw(wav_dir) temp_dir = './temp/temp.npy' np.save(temp_dir, noisy_speech) graph = gt.load_graph(graph_name) norm_path = os.path.abspath('./data/train/norm') test_dr = dr.DataReader(temp_dir, '', norm_path, dist_num=config.dist_num, is_training=False, is_shuffle=False) node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0') node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0') node_keep_prob = graph.get_tensor_by_name('prefix/model_1/keep_prob:0') node_prediction = graph.get_tensor_by_name('prefix/model_1/pred:0') pred = [] lab = [] sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True while True: test_inputs, test_labels = test_dr.next_batch(config.test_batch_size) feed_dict = {node_inputs: test_inputs, node_labels: test_labels, node_keep_prob: 1.0} with tf.Session(graph=graph, config=sess_config) as sess: pred_temp, lab_temp = sess.run([node_prediction, node_labels], feed_dict=feed_dict) pred.append(pred_temp) lab.append(lab_temp) # print(test_dr.file_change_checker()) if test_dr.file_change_checker(): print(wav_dir) phase = test_dr.phase[0] lpsd = np.expand_dims(np.reshape(np.concatenate(pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :], axis=2) mean, std = test_dr.norm_process(norm_path + '/norm_noisy.mat') lpsd = np.squeeze((lpsd * std) + mean) # denorm recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)), win_size=config.win_size, win_step=config.win_step, fs=config.fs) # plt.plot(recon_speech) # plt.show() # lab = np.reshape(np.asarray(lab), [-1, 1]) test_dr.reader_initialize() break return recon_speech
def test(clean_dir=clean_dir, noisy_dir=noisy_dir, norm_dir=norm_dir): # logs_dir = os.path.abspath('./logs' + '/logs_' + "2018-06-04-02-06-49") model_dir = os.path.abspath('./saved_model') # gs.freeze_graph(logs_dir, model_dir, 'model_1/pred,model_1/labels,model_1/cost') graph_name = sorted(glob.glob(model_dir + '/*.pb'))[-1] # graph_name = '/home/jtkim/hdd3/github_2/SE_graph/Boost_2/Boost_2.pb' noisy_list = sorted(glob.glob(noisy_dir + '/*.raw')) clean_list = sorted(glob.glob(clean_dir + '/*.raw')) num_data = len(clean_list) noisy_result = { 'noisy_pesq': np.zeros((num_data, 4, 15)), 'noisy_stoi': np.zeros((num_data, 4, 15)), 'noisy_ssnr': np.zeros((num_data, 4, 15)), 'noisy_lsd': np.zeros((num_data, 4, 15)) } enhance_result = { 'enhanced_pesq': np.zeros((num_data, 4, 15)), 'enhanced_stoi': np.zeros((num_data, 4, 15)), 'enhanced_ssnr': np.zeros((num_data, 4, 15)), 'enhanced_lsd': np.zeros((num_data, 4, 15)) } se = SE(graph_name=graph_name, norm_path=norm_dir) eng = matlab.engine.start_matlab() eng.addpath(eng.genpath('.')) for noisy_dir in noisy_list: file_num = int( os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split( "num")[-1]) - 1 snr_num = int( os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1 noise_num = int( os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1 for clean_name in clean_list: if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]: clean_dir = clean_name break print(noisy_dir) # recon_speech = speech_enhance(noisy_dir, graph_name) recon_speech = se.enhance(noisy_dir) noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir)) clean_speech = utils.identity_trans(utils.read_raw(clean_dir)) noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng) enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng) noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq'] noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi'] noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr'] noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd'] enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq'] enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi'] enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr'] enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd'] noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0) noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0) noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0) noisy_result['noisy_lsd'] = np.mean(noisy_result['noisy_lsd'], axis=0) enhance_result['enhanced_pesq'] = np.mean(enhance_result['enhanced_pesq'], axis=0) enhance_result['enhanced_stoi'] = np.mean(enhance_result['enhanced_stoi'], axis=0) enhance_result['enhanced_ssnr'] = np.mean(enhance_result['enhanced_ssnr'], axis=0) enhance_result['enhanced_lsd'] = np.mean(enhance_result['enhanced_lsd'], axis=0) scipy.io.savemat('./test_result/noisy_result.mat', noisy_result) scipy.io.savemat('./test_result/enhanced_result.mat', enhance_result) eng.exit()
def _read_input(self, input_file_dir): # print("num_file: %.2d" % self._num_file) # self._dataindx = input_file_dir.split("num")[1].split('.')[0] # dataname = os.path.dirname(input_file_dir) + '/' + os.path.basename(input_file_dir).split('.')[0] + '.npy' if self._is_training: # print("num_file: %.3d" % self._num_file) self._dataindx = input_file_dir.split("num")[1].split('.')[0] dataname = os.path.dirname( input_file_dir) + '/' + os.path.basename(input_file_dir).split( '.')[0] + '.bin' if os.path.exists(dataname): # print("num_file: %.2d load..." % self._num_file) feat_spec = np.load(dataname.replace('.bin', '.npy')).item() feat_size = feat_spec['feat_size'] feat_shape = feat_spec['feat_shape'] phase_shape = feat_spec['phase_shape'] feat_max = feat_spec['max'] feat_phase = utils.read_raw(dataname) * np.float32(feat_max) feat = feat_phase[0:feat_size].reshape(feat_shape) phase = feat_phase[feat_size:].reshape(phase_shape) # print("num_file: %.2d load... done" % self._num_file) else: print("num_file: %.2d feature extraction..." % self._num_file) # data, _ = librosa.load(input_file_dir, config.fs) data = utils.read_raw(input_file_dir) # plt.subplot(211) # # S = librosa.amplitude_to_db( # librosa.stft(data, hop_length=config.win_step, # win_length=config.win_size, n_fft=config.nfft), ref=np.max) # ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) if config.parallel: feat, phase = self.lpsd_dist_p(data, self._dist_num, is_patch=True) else: feat, phase = self.lpsd_dist(data, self._dist_num, is_patch=True) feat_size = feat.size feat_phase = np.concatenate( (feat.reshape(-1), phase.reshape(-1))) utils.write_bin(feat_phase, np.max(np.abs(feat_phase)), dataname) feat_spec = { 'feat_size': feat_size, 'phase_shape': phase.shape, 'feat_shape': feat.shape, 'max': np.max(np.abs(feat_phase)) } np.save(dataname.replace('.bin', ''), feat_spec) print("num_file: %.2d feature extraction... done." % self._num_file) else: data = np.load(input_file_dir) # data, _ = librosa.load(input_file_dir, config.fs) if config.parallel: feat, phase = self.lpsd_dist_p(data, self._dist_num) else: feat, phase = self.lpsd_dist(data, self._dist_num) # feat shape: (num samples, config.time_width, config.freq_size, 1) # if self._is_shuffle: # feat = np.reshape(feat, (-1, self._batch_size, feat.shape[1], feat.shape[2], feat.shape[3])) # self._perm_indx = perm_indx = np.random.permutation(feat.shape[0]) # feat = np.reshape(feat[perm_indx, :], (-1, config.time_width, config.freq_size, 1)) '''''' if self._is_shuffle: self._perm_indx = perm_indx = np.random.permutation(feat.shape[0]) feat = feat[perm_indx, :] self.num_samples = np.shape(feat)[0] if self._is_val: phase = np.zeros(feat.shape) return feat, phase
for noisy_dir in noisy_list: file_num = int(os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split("num")[-1]) - 1 snr_num = int(os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1 noise_num = int(os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1 for clean_name in clean_list: if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]: clean_dir = clean_name break print(noisy_dir) # recon_speech = speech_enhance(noisy_dir, graph_name) recon_speech = se.enhance(noisy_dir) noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir)) clean_speech = utils.identity_trans(utils.read_raw(clean_dir)) noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng) enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng) noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq'] noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi'] noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr'] noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd'] enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq'] enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi'] enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr'] enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd']
import os import numpy as np from utils import read_raw from hsi_utils import read_hyper import scipy.ndimage.measurements as spm # -- get file names for broadband and hsi droot = os.environ["REBOUND_DATA"] bbname = os.path.join(droot, "bb", "2017", "07", "06", "stack_bb_0037777.raw") hsname = os.path.join(droot, "slow_hsi_scans", "night_052617_00007.raw") # -- read in the images try: img_hs except: img_bb = read_raw(bbname) img_hs = read_hyper(hsname).data[350].copy() # sources in broadband rr_bb = np.array([821.88822972243372, 1200.35, 1686.51, 1212.2626788969724]) cc_bb = np.array([3313.3156852608604, 541.344, 940.936, 3543.3720854930834]) #sources in hyperspectral rr_hsi = np.array([852.906, 939.741, 1036.47, 931.829]) cc_hsi = np.array([2065.47, 1508.36, 1589.24, 2112.66]) #distance between broadband sources dist_bb = np.sqrt((rr_bb[:, np.newaxis] - rr_bb)**2 + (cc_bb[:, np.newaxis] - cc_bb)**2) bb1 = dist_bb[0, 1]
def se_test(wav_dir, noise_dir, snr, noise_type=1): # clean_speech, clean_fs = librosa.load(wav_dir, config.fs) clean_speech = utils.read_raw(wav_dir) eng = matlab.engine.start_matlab() # noisy_speech = np.array(eng.noise_add(wav_dir, noise_dir, noise_type, snr, nargout=1)) # noisy_speech, noisy_fs = librosa.load(noise_dir, config.fs) noisy_speech = utils.read_raw(noise_dir) # noisy_measure = se_eval(clean_speech, np.squeeze(noisy_speech), float(config.fs)) temp_dir = './data/test/temp/temp.npy' np.save(temp_dir, noisy_speech) graph_name = sorted(glob.glob('./saved_model/*.pb'))[-1] graph = gt.load_graph(graph_name) norm_path = os.path.abspath('./data/train/norm') test_dr = dr.DataReader(temp_dir, '', norm_path, dist_num=config.dist_num, is_training=False, is_shuffle=False) node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0') node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0') node_keep_prob = graph.get_tensor_by_name('prefix/model_1/keep_prob:0') node_prediction = graph.get_tensor_by_name('prefix/model_1/pred:0') pred = [] lab = [] while True: test_inputs, test_labels = test_dr.next_batch(config.test_batch_size) feed_dict = { node_inputs: test_inputs, node_labels: test_labels, node_keep_prob: 1.0 } sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=sess_config) as sess: pred_temp, lab_temp = sess.run([node_prediction, node_labels], feed_dict=feed_dict) pred.append(pred_temp) lab.append(lab_temp) if test_dr.file_change_checker(): phase = test_dr.phase[0] lpsd = np.expand_dims(np.reshape(np.concatenate( pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :], axis=2) mean, std = test_dr.norm_process(norm_path + '/norm_noisy.mat') lpsd = np.squeeze((lpsd * std) + mean) # denorm recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)), win_size=config.win_size, win_step=config.win_step, fs=config.fs) # plt.plot(recon_speech) # plt.show() # lab = np.reshape(np.asarray(lab), [-1, 1]) test_dr.reader_initialize() break # os.popen('rm -rf ' + temp_dir) noisy_measure = utils.se_eval( clean_speech[0:recon_speech.shape[0]], np.squeeze(noisy_speech[0:recon_speech.shape[0]]), float(config.fs)) enhanced_measure = utils.se_eval(clean_speech[0:recon_speech.shape[0]], recon_speech, float(config.fs)) print("pesq: %.4f -> %.4f" % (noisy_measure["pesq"], enhanced_measure["pesq"])) print("lsd: %.4f -> %.4f" % (noisy_measure["lsd"], enhanced_measure["lsd"])) print("stoi: %.4f -> %.4f" % (noisy_measure["stoi"], enhanced_measure["stoi"])) print("ssnr: %.4f -> %.4f" % (noisy_measure["ssnr"], enhanced_measure["ssnr"])) plt.subplot(3, 1, 1) S = librosa.amplitude_to_db(librosa.stft( clean_speech[0:recon_speech.shape[0]], hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.subplot(3, 1, 2) S = librosa.amplitude_to_db(librosa.stft(np.squeeze( noisy_speech[0:recon_speech.shape[0]]), hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.subplot(3, 1, 3) S = librosa.amplitude_to_db(librosa.stft(recon_speech, hop_length=config.win_step, win_length=config.win_size, n_fft=config.nfft), ref=np.max) ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs) plt.show() return recon_speech