Ejemplo n.º 1
0
    def do_summary(self, m_summary, sess, itr):

        valid_path = self.valid_path
        clean_speech = self.clean_speech
        clean_speech = utils.identity_trans(clean_speech)

        noisy_speech = self.noisy_speech
        noisy_speech = utils.identity_trans(noisy_speech)

        temp_dir = self.temp_dir
        name = self.name
        logs_dir = self.logs_dir

        writer = SummaryWriter(log_dir=self.logs_dir + '/summary')

        summary_dr = dr.DataReader(temp_dir, '', valid_path["norm_path"], dist_num=config.dist_num, is_training=False,
                                   is_shuffle=False)
        pred = []

        while True:

            summary_inputs, summary_labels = summary_dr.next_batch(config.batch_size)

            feed_dict = {m_summary.inputs: summary_inputs, m_summary.labels: summary_labels, m_summary.keep_prob: 1.0}

            pred_temp = sess.run(m_summary.pred, feed_dict=feed_dict)

            pred.append(pred_temp)

            if summary_dr.file_change_checker():
                phase = summary_dr.phase[0]

                lpsd = np.expand_dims(
                    np.reshape(np.concatenate(pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :],
                    axis=2)

                mean, std = summary_dr.norm_process(valid_path["norm_path"] + '/norm_noisy.mat')

                lpsd = np.squeeze((lpsd * std * 1.18) + mean)  # denorm

                recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)), np.transpose(phase, (1, 0)),
                                               win_size=config.win_size, win_step=config.win_step, fs=config.fs)

                # plt.plot(recon_speech)
                # plt.show()
                # lab = np.reshape(np.asarray(lab), [-1, 1])
                summary_dr.reader_initialize()
                break

        # write summary

        if itr == config.summary_step:
            writer.close()
            self.noisy_measure = utils.se_eval(clean_speech,
                                          np.squeeze(noisy_speech), float(config.fs))
            summary_fname = tf.summary.text(name + '_filename', tf.convert_to_tensor(self.noisy_dir))

            if name == 'train':

                config_str = "<br>sampling frequency: %d</br>" \
                             "<br>window step: %d ms</br>" \
                             "<br>window size: %d ms</br>" \
                             "<br>fft size: %d</br>" \
                             "<br>learning rate: %f</br><br>learning rate decay: %.4f</br><br>learning" \
                             " rate decay frequency: %.4d</br>" \
                             "<br>dropout rate: %.4f</br><br>max epoch:" \
                             " %.4e</br><br>batch size: %d</br><br>model type: %s</br>"\
                             % (config.fs, (config.win_step/config.fs*1000), (config.win_size/config.fs*1000),
                                config.nfft, config.lr, config.lrDecayRate, config.lrDecayFreq, config.keep_prob,
                                config.max_epoch, config.batch_size, config.mode)

                summary_config = tf.summary.text(name + '_configuration', tf.convert_to_tensor(config_str))

                code_list = []
                read_flag = False

                with open('./lib/trnmodel.py', 'r') as f:
                    while True:
                        line = f.readline()
                        if "def inference(self, inputs):" in line:
                            read_flag = True

                        if "return fm" in line:
                            code_list.append('<br>' + line.replace('\n', '') + '</br>')
                            break

                        if read_flag:
                            code_list.append('<br>' + line.replace('\n', '') + '</br>')

                code_list = "<pre>" + "".join(code_list) + "</pre>"

                summary_model = tf.summary.text('train_model', tf.convert_to_tensor(code_list))

                summary_op = tf.summary.merge([summary_fname, summary_config, summary_model])
            else:
                summary_op = tf.summary.merge([summary_fname])

            with tf.Session() as sess:
                summary_writer = tf.summary.FileWriter(logs_dir + '/summary/text')
                text = sess.run(summary_op)
                summary_writer.add_summary(text, 1)
            summary_writer.close()

            writer = SummaryWriter(log_dir=logs_dir + '/summary')

            writer.add_audio(name + '_audio_ref' + '/clean', clean_speech
                             /np.max(np.abs(clean_speech)), itr,
                             sample_rate=config.fs)
            writer.add_audio(name + '_audio_ref' + '/noisy', noisy_speech
                             /np.max(np.abs(noisy_speech)), itr,
                             sample_rate=config.fs)
            clean_S = get_spectrogram(clean_speech)
            noisy_S = get_spectrogram(noisy_speech)

            writer.add_image(name + '_spectrogram_ref' + '/clean', clean_S, itr)  # image_shape = (C, H, W)
            writer.add_image(name + '_spectrogram_ref' + '/noisy', noisy_S, itr)  # image_shape = (C, H, W)

        enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs))
        writer.add_scalars(name + '_speech_quality' + '/pesq', {'enhanced': enhanced_measure['pesq'],
                                                                'ref': self.noisy_measure['pesq']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/stoi', {'enhanced': enhanced_measure['stoi'],
                                                                'ref': self.noisy_measure['stoi']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/lsd', {'enhanced': enhanced_measure['lsd'],
                                                               'ref': self.noisy_measure['lsd']}, itr)
        writer.add_scalars(name + '_speech_quality' + '/ssnr', {'enhanced': enhanced_measure['ssnr'],
                                                                'ref': self.noisy_measure['ssnr']}, itr)

        writer.add_audio(name + '_audio_enhanced' + '/enhanced', recon_speech/np.max(np.abs(recon_speech)),
                         itr, sample_rate=config.fs)
        enhanced_S = get_spectrogram(recon_speech)
        writer.add_image(name + '_spectrogram_enhanced' + '/enhanced', enhanced_S, itr)  # image_shape = (C, H, W)
        writer.close()
Ejemplo n.º 2
0
        file_num = int(os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split("num")[-1]) - 1
        snr_num = int(os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1
        noise_num = int(os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1

        for clean_name in clean_list:
            if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]:
                clean_dir = clean_name
                break
        print(noisy_dir)

        # recon_speech = speech_enhance(noisy_dir, graph_name)
        recon_speech = se.enhance(noisy_dir)
        noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir))
        clean_speech = utils.identity_trans(utils.read_raw(clean_dir))

        noisy_measure = utils.se_eval(clean_speech, noisy_speech, float(config.fs), eng)
        enhanced_measure = utils.se_eval(clean_speech, recon_speech, float(config.fs), eng)

        noisy_result['noisy_pesq'][file_num, snr_num, noise_num] = noisy_measure['pesq']
        noisy_result['noisy_stoi'][file_num, snr_num, noise_num] = noisy_measure['stoi']
        noisy_result['noisy_ssnr'][file_num, snr_num, noise_num] = noisy_measure['ssnr']
        noisy_result['noisy_lsd'][file_num, snr_num, noise_num] = noisy_measure['lsd']

        enhance_result['enhanced_pesq'][file_num, snr_num, noise_num] = enhanced_measure['pesq']
        enhance_result['enhanced_stoi'][file_num, snr_num, noise_num] = enhanced_measure['stoi']
        enhance_result['enhanced_ssnr'][file_num, snr_num, noise_num] = enhanced_measure['ssnr']
        enhance_result['enhanced_lsd'][file_num, snr_num, noise_num] = enhanced_measure['lsd']

    noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0)
    noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0)
    noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0)
Ejemplo n.º 3
0
def test(clean_dir=clean_dir, noisy_dir=noisy_dir, norm_dir=norm_dir):

    # logs_dir = os.path.abspath('./logs' + '/logs_' + "2018-06-04-02-06-49")
    model_dir = os.path.abspath('./saved_model')
    # gs.freeze_graph(logs_dir, model_dir, 'model_1/pred,model_1/labels,model_1/cost')

    graph_name = sorted(glob.glob(model_dir + '/*.pb'))[-1]
    # graph_name = '/home/jtkim/hdd3/github_2/SE_graph/Boost_2/Boost_2.pb'

    noisy_list = sorted(glob.glob(noisy_dir + '/*.raw'))
    clean_list = sorted(glob.glob(clean_dir + '/*.raw'))
    num_data = len(clean_list)

    noisy_result = {
        'noisy_pesq': np.zeros((num_data, 4, 15)),
        'noisy_stoi': np.zeros((num_data, 4, 15)),
        'noisy_ssnr': np.zeros((num_data, 4, 15)),
        'noisy_lsd': np.zeros((num_data, 4, 15))
    }

    enhance_result = {
        'enhanced_pesq': np.zeros((num_data, 4, 15)),
        'enhanced_stoi': np.zeros((num_data, 4, 15)),
        'enhanced_ssnr': np.zeros((num_data, 4, 15)),
        'enhanced_lsd': np.zeros((num_data, 4, 15))
    }

    se = SE(graph_name=graph_name, norm_path=norm_dir)

    eng = matlab.engine.start_matlab()
    eng.addpath(eng.genpath('.'))

    for noisy_dir in noisy_list:

        file_num = int(
            os.path.basename(noisy_dir).split("_")[-1].split(".raw")[0].split(
                "num")[-1]) - 1
        snr_num = int(
            os.path.basename(noisy_dir).split("_")[1].split("snr")[1]) - 1
        noise_num = int(
            os.path.basename(noisy_dir).split("_")[0].split("noisy")[1]) - 1

        for clean_name in clean_list:
            if clean_name.split('num')[-1] == noisy_dir.split('num')[-1]:
                clean_dir = clean_name
                break
        print(noisy_dir)

        # recon_speech = speech_enhance(noisy_dir, graph_name)
        recon_speech = se.enhance(noisy_dir)
        noisy_speech = utils.identity_trans(utils.read_raw(noisy_dir))
        clean_speech = utils.identity_trans(utils.read_raw(clean_dir))

        noisy_measure = utils.se_eval(clean_speech, noisy_speech,
                                      float(config.fs), eng)
        enhanced_measure = utils.se_eval(clean_speech, recon_speech,
                                         float(config.fs), eng)

        noisy_result['noisy_pesq'][file_num, snr_num,
                                   noise_num] = noisy_measure['pesq']
        noisy_result['noisy_stoi'][file_num, snr_num,
                                   noise_num] = noisy_measure['stoi']
        noisy_result['noisy_ssnr'][file_num, snr_num,
                                   noise_num] = noisy_measure['ssnr']
        noisy_result['noisy_lsd'][file_num, snr_num,
                                  noise_num] = noisy_measure['lsd']

        enhance_result['enhanced_pesq'][file_num, snr_num,
                                        noise_num] = enhanced_measure['pesq']
        enhance_result['enhanced_stoi'][file_num, snr_num,
                                        noise_num] = enhanced_measure['stoi']
        enhance_result['enhanced_ssnr'][file_num, snr_num,
                                        noise_num] = enhanced_measure['ssnr']
        enhance_result['enhanced_lsd'][file_num, snr_num,
                                       noise_num] = enhanced_measure['lsd']

    noisy_result['noisy_pesq'] = np.mean(noisy_result['noisy_pesq'], axis=0)
    noisy_result['noisy_stoi'] = np.mean(noisy_result['noisy_stoi'], axis=0)
    noisy_result['noisy_ssnr'] = np.mean(noisy_result['noisy_ssnr'], axis=0)
    noisy_result['noisy_lsd'] = np.mean(noisy_result['noisy_lsd'], axis=0)

    enhance_result['enhanced_pesq'] = np.mean(enhance_result['enhanced_pesq'],
                                              axis=0)
    enhance_result['enhanced_stoi'] = np.mean(enhance_result['enhanced_stoi'],
                                              axis=0)
    enhance_result['enhanced_ssnr'] = np.mean(enhance_result['enhanced_ssnr'],
                                              axis=0)
    enhance_result['enhanced_lsd'] = np.mean(enhance_result['enhanced_lsd'],
                                             axis=0)

    scipy.io.savemat('./test_result/noisy_result.mat', noisy_result)
    scipy.io.savemat('./test_result/enhanced_result.mat', enhance_result)

    eng.exit()
Ejemplo n.º 4
0
def se_test(wav_dir, noise_dir, snr, noise_type=1):

    # clean_speech, clean_fs = librosa.load(wav_dir, config.fs)
    clean_speech = utils.read_raw(wav_dir)
    eng = matlab.engine.start_matlab()

    # noisy_speech = np.array(eng.noise_add(wav_dir, noise_dir, noise_type, snr, nargout=1))
    # noisy_speech, noisy_fs = librosa.load(noise_dir, config.fs)
    noisy_speech = utils.read_raw(noise_dir)

    # noisy_measure = se_eval(clean_speech, np.squeeze(noisy_speech), float(config.fs))

    temp_dir = './data/test/temp/temp.npy'

    np.save(temp_dir, noisy_speech)
    graph_name = sorted(glob.glob('./saved_model/*.pb'))[-1]
    graph = gt.load_graph(graph_name)
    norm_path = os.path.abspath('./data/train/norm')

    test_dr = dr.DataReader(temp_dir,
                            '',
                            norm_path,
                            dist_num=config.dist_num,
                            is_training=False,
                            is_shuffle=False)

    node_inputs = graph.get_tensor_by_name('prefix/model_1/inputs:0')
    node_labels = graph.get_tensor_by_name('prefix/model_1/labels:0')
    node_keep_prob = graph.get_tensor_by_name('prefix/model_1/keep_prob:0')
    node_prediction = graph.get_tensor_by_name('prefix/model_1/pred:0')

    pred = []
    lab = []

    while True:

        test_inputs, test_labels = test_dr.next_batch(config.test_batch_size)

        feed_dict = {
            node_inputs: test_inputs,
            node_labels: test_labels,
            node_keep_prob: 1.0
        }

        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     log_device_placement=False)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(graph=graph, config=sess_config) as sess:
            pred_temp, lab_temp = sess.run([node_prediction, node_labels],
                                           feed_dict=feed_dict)

        pred.append(pred_temp)
        lab.append(lab_temp)

        if test_dr.file_change_checker():
            phase = test_dr.phase[0]

            lpsd = np.expand_dims(np.reshape(np.concatenate(
                pred, axis=0), [-1, config.freq_size])[0:phase.shape[0], :],
                                  axis=2)

            mean, std = test_dr.norm_process(norm_path + '/norm_noisy.mat')

            lpsd = np.squeeze((lpsd * std) + mean)  # denorm

            recon_speech = utils.get_recon(np.transpose(lpsd, (1, 0)),
                                           np.transpose(phase, (1, 0)),
                                           win_size=config.win_size,
                                           win_step=config.win_step,
                                           fs=config.fs)

            # plt.plot(recon_speech)
            # plt.show()
            # lab = np.reshape(np.asarray(lab), [-1, 1])
            test_dr.reader_initialize()
            break

    # os.popen('rm -rf ' + temp_dir)

    noisy_measure = utils.se_eval(
        clean_speech[0:recon_speech.shape[0]],
        np.squeeze(noisy_speech[0:recon_speech.shape[0]]), float(config.fs))

    enhanced_measure = utils.se_eval(clean_speech[0:recon_speech.shape[0]],
                                     recon_speech, float(config.fs))
    print("pesq: %.4f -> %.4f" %
          (noisy_measure["pesq"], enhanced_measure["pesq"]))
    print("lsd: %.4f -> %.4f" %
          (noisy_measure["lsd"], enhanced_measure["lsd"]))
    print("stoi: %.4f -> %.4f" %
          (noisy_measure["stoi"], enhanced_measure["stoi"]))
    print("ssnr: %.4f -> %.4f" %
          (noisy_measure["ssnr"], enhanced_measure["ssnr"]))

    plt.subplot(3, 1, 1)
    S = librosa.amplitude_to_db(librosa.stft(
        clean_speech[0:recon_speech.shape[0]],
        hop_length=config.win_step,
        win_length=config.win_size,
        n_fft=config.nfft),
                                ref=np.max)
    ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs)

    plt.subplot(3, 1, 2)
    S = librosa.amplitude_to_db(librosa.stft(np.squeeze(
        noisy_speech[0:recon_speech.shape[0]]),
                                             hop_length=config.win_step,
                                             win_length=config.win_size,
                                             n_fft=config.nfft),
                                ref=np.max)
    ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs)

    plt.subplot(3, 1, 3)
    S = librosa.amplitude_to_db(librosa.stft(recon_speech,
                                             hop_length=config.win_step,
                                             win_length=config.win_size,
                                             n_fft=config.nfft),
                                ref=np.max)
    ld.specshow(S, y_axis='linear', hop_length=config.win_step, sr=config.fs)

    plt.show()

    return recon_speech