Beispiel #1
0
def test(clean_dir=clean_dir, noisy_dir=noisy_dir, norm_dir=norm_dir):

    # logs_dir = os.path.abspath('./logs' + '/logs_' + "2018-06-04-02-06-49")
    model_dir = os.path.abspath('./saved_model')
    # gs.freeze_graph(logs_dir, model_dir, 'model_1/pred,model_1/labels,model_1/cost')

    graph_name = sorted(glob.glob(model_dir + '/*.pb'))[-1]
    # graph_name = '/home/jtkim/hdd3/github_2/SE_graph/Boost_2/Boost_2.pb'

    noisy_list = sorted(glob.glob(noisy_dir + '/*.raw'))
    clean_list = sorted(glob.glob(clean_dir + '/*.raw'))
    num_data = len(clean_list)

    noisy_result = {
        'noisy_pesq': np.zeros((num_data, 4, 15)),
        'noisy_stoi': np.zeros((num_data, 4, 15)),
        'noisy_ssnr': np.zeros((num_data, 4, 15)),
        'noisy_lsd': np.zeros((num_data, 4, 15))
    }

    enhance_result = {
        'enhanced_pesq': np.zeros((num_data, 4, 15)),
        'enhanced_stoi': np.zeros((num_data, 4, 15)),
        'enhanced_ssnr': np.zeros((num_data, 4, 15)),
        'enhanced_lsd': np.zeros((num_data, 4, 15))
    }

    se = SE(graph_name=graph_name, norm_path=norm_dir)

    eng = matlab.engine.start_matlab()
    eng.addpath(eng.genpath('.'))

    for noisy_dir in noisy_list:

        file_num = int(
            os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split(
                "num")[-1]) - 1
        snr_num = int(
            os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1
        noise_num = int(
            os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1

        for clean_name in clean_list:
            if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]:
                clean_dir = clean_name
                break
        print(noisy_dir)

        # recon_speech = speech_enhance(noisy_dir, graph_name)
        recon_speech = se.enhance(noisy_dir)
        noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir))
        clean_speech = utils.identity_trans(utils.read_raw(clean_dir))

        noisy_measure = utils.se_eval(clean_speech, noisy_speech,
                                      float(config.fs), eng)
        enhanced_measure = utils.se_eval(clean_speech, recon_speech,
                                         float(config.fs), eng)

        noisy_result['noisy_pesq'][file_num, snr_num,
                                   noise_num] = noisy_measure['pesq']
        noisy_result['noisy_stoi'][file_num, snr_num,
                                   noise_num] = noisy_measure['stoi']
        noisy_result['noisy_ssnr'][file_num, snr_num,
                                   noise_num] = noisy_measure['ssnr']
        noisy_result['noisy_lsd'][file_num, snr_num,
                                  noise_num] = noisy_measure['lsd']

        enhance_result['enhanced_pesq'][file_num, snr_num,
                                        noise_num] = enhanced_measure['pesq']
        enhance_result['enhanced_stoi'][file_num, snr_num,
                                        noise_num] = enhanced_measure['stoi']
        enhance_result['enhanced_ssnr'][file_num, snr_num,
                                        noise_num] = enhanced_measure['ssnr']
        enhance_result['enhanced_lsd'][file_num, snr_num,
                                       noise_num] = enhanced_measure['lsd']

    noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0)
    noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0)
    noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0)
    noisy_result['noisy_lsd'] = np.mean(noisy_result['noisy_lsd'], axis=0)

    enhance_result['enhanced_pesq'] = np.mean(enhance_result['enhanced_pesq'],
                                              axis=0)
    enhance_result['enhanced_stoi'] = np.mean(enhance_result['enhanced_stoi'],
                                              axis=0)
    enhance_result['enhanced_ssnr'] = np.mean(enhance_result['enhanced_ssnr'],
                                              axis=0)
    enhance_result['enhanced_lsd'] = np.mean(enhance_result['enhanced_lsd'],
                                             axis=0)

    scipy.io.savemat('./test_result/noisy_result.mat', noisy_result)
    scipy.io.savemat('./test_result/enhanced_result.mat', enhance_result)

    eng.exit()
Beispiel #2
0
    def do_summary(self, m_summary, sess, itr):

        valid_path = self.valid_path
        clean_speech = self.clean_speech
        clean_speech = utils.identity_trans(clean_speech)

        noisy_speech = self.noisy_speech
        noisy_speech = utils.identity_trans(noisy_speech)

        temp_dir = self.temp_dir
        name = self.name
        logs_dir = self.logs_dir

        writer = SummaryWriter(log_dir=self.logs_dir + '/summary')

        summary_dr = dr.DataReader(temp_dir, '', valid_path["norm_path"], dist_num=config.dist_num, is_training=False,
                                   is_shuffle=False)
        pred = []

        while True:

            summary_inputs, summary_labels = summary_dr.next_batch(config.batch_size)

            feed_dict = {m_summary.inputs: summary_inputs, m_summary.labels: summary_labels, m_summary.keep_prob: 1.0}

            pred_temp = sess.run(m_summary.pred, feed_dict=feed_dict)

            pred.append(pred_temp)

            if summary_dr.file_change_checker():
                phase = summary_dr.phase[0]

                lpsd = np.expand_dims(
                    np.reshape(np.concatenate(pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :],
                    axis=2)

                mean, std = summary_dr.norm_process(valid_path["norm_path"] + '/norm_noisy.mat')

                lpsd = np.squeeze((lpsd * std * 1.18) + mean)  # denorm

                recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)),
                                               win_size=config.win_size, win_step=config.win_step, fs=config.fs)

                # plt.plot(recon_speech)
                # plt.show()
                # lab = np.reshape(np.asarray(lab), [-1, 1])
                summary_dr.reader_initialize()
                break

        # write summary

        if itr == config.summary_step:
            writer.close()
            self.noisy_measure = utils.se_eval(clean_speech,
                                          np.squeeze(noisy_speech), float(config.fs))
            summary_fname = tf.summary.text(name + '_filename', tf.convert_to_tensor(self.noisy_dir))

            if name == 'train':

                config_str = "<br>sampling frequency: %d</br>" \
                             "<br>window step: %d ms</br>" \
                             "<br>window size: %d ms</br>" \
                             "<br>fft size: %d</br>" \
                             "<br>learning rate: %f</br><br>learning rate decay: %.4f</br><br>learning" \
                             " rate decay frequency: %.4d</br>" \
                             "<br>dropout rate: %.4f</br><br>max epoch:" \
                             " %.4e</br><br>batch size: %d</br><br>model type: %s</br>"\
                             % (config.fs, (config.win_step/config.fs*1000), (config.win_size/config.fs*1000),
                                config.nfft, config.lr, config.lrDecayRate, config.lrDecayFreq, config.keep_prob,
                                config.max_epoch, config.batch_size, config.mode)

                summary_config = tf.summary.text(name + '_configuration', tf.convert_to_tensor(config_str))

                code_list = []
                read_flag = False

                with open('./lib/trnmodel.py', 'r') as f:
                    while True:
                        line = f.readline()
                        if "def inference(self, inputs):" in line:
                            read_flag = True

                        if "return fm" in line:
                            code_list.append('<br>' + line.replace('\n', '') + '</br>')
                            break

                        if read_flag:
                            code_list.append('<br>' + line.replace('\n', '') + '</br>')

                code_list = "<pre>" + "".join(code_list) + "</pre>"

                summary_model = tf.summary.text('train_model', tf.convert_to_tensor(code_list))

                summary_op = tf.summary.merge([summary_fname, summary_config, summary_model])
            else:
                summary_op = tf.summary.merge([summary_fname])

            with tf.Session() as sess:
                summary_writer = tf.summary.FileWriter(logs_dir + '/summary/text')
                text = sess.run(summary_op)
                summary_writer.add_summary(text, 1)
            summary_writer.close()

            writer = SummaryWriter(log_dir=logs_dir + '/summary')

            writer.add_audio(name + '_audio_ref' + '/clean', clean_speech
                             /np.max(np.abs(clean_speech)), itr,
                             sample_rate=config.fs)
            writer.add_audio(name + '_audio_ref' + '/noisy', noisy_speech
                             /np.max(np.abs(noisy_speech)), itr,
                             sample_rate=config.fs)
            clean_S = get_spectrogram(clean_speech)
            noisy_S = get_spectrogram(noisy_speech)

            writer.add_image(name + '_spectrogram_ref' + '/clean', clean_S, itr)  # image_shape = (C, H, W)
            writer.add_image(name + '_spectrogram_ref' + '/noisy', noisy_S, itr)  # image_shape = (C, H, W)

        enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs))
        writer.add_scalars(name + '_speech_quality' + '/pesq', {'enhanced': enhanced_measure['pesq'],
                                                                'ref': self.noisy_measure['pesq']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/stoi', {'enhanced': enhanced_measure['stoi'],
                                                                'ref': self.noisy_measure['stoi']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/lsd', {'enhanced': enhanced_measure['lsd'],
                                                               'ref': self.noisy_measure['lsd']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/ssnr', {'enhanced': enhanced_measure['ssnr'],
                                                                'ref': self.noisy_measure['ssnr']}, itr)

        writer.add_audio(name + '_audio_enhanced' + '/enhanced', recon_speech/np.max(np.abs(recon_speech)),
                         itr, sample_rate=config.fs)
        enhanced_S = get_spectrogram(recon_speech)
        writer.add_image(name + '_spectrogram_enhanced' + '/enhanced', enhanced_S, itr)  # image_shape = (C, H, W)
        writer.close()
Beispiel #3
0
    for noisy_dir in noisy_list:

        file_num = int(os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split("num")[-1]) - 1
        snr_num = int(os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1
        noise_num = int(os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1

        for clean_name in clean_list:
            if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]:
                clean_dir = clean_name
                break
        print(noisy_dir)

        # recon_speech = speech_enhance(noisy_dir, graph_name)
        recon_speech = se.enhance(noisy_dir)
        noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir))
        clean_speech = utils.identity_trans(utils.read_raw(clean_dir))

        noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng)
        enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng)

        noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq']
        noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi']
        noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr']
        noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd']

        enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq']
        enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi']
        enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr']
        enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd']