Beispiel #1
0
def calc_pesq(ref_sig, deg_sig, samplerate, is_file=False):
    '''
    计算语音质量听觉评估
    return 评估的分数,分数高的结果比较好
    '''
    if 'Windows' in platform.system():
        # 暂不支持windows下pesq计算
        return 0

    if is_file:
        output = os.popen('%s +%d %s %s' %
                          (PESQ_PATH, samplerate, ref_sig, deg_sig))
        msg = output.read()
    else:
        tmp_ref = tempfile.NamedTemporaryFile(suffix='.wav', delete=True)
        tmp_deg = tempfile.NamedTemporaryFile(suffix='.wav', delete=True)
        # librosa.output.write_wav(tmp_ref.name, ref_sig, samplerate)
        # librosa.output.write_wav(tmp_deg.name, deg_sig, samplerate)
        audio_tool.write_audio(tmp_ref.name, ref_sig, samplerate)
        audio_tool.write_audio(tmp_deg.name, deg_sig, samplerate)
        output = os.popen('%s +%d %s %s' %
                          (PESQ_PATH, samplerate, tmp_ref.name, tmp_deg.name))
        msg = output.read()
        # print(msg)
        tmp_ref.close()
        tmp_deg.close()
        # os.unlink(tmp_ref.name)
        # os.unlink(tmp_deg.name)
    # print(msg)
    score = msg.split('Prediction : PESQ_MOS = ')
    # print(msg)
    # exit(0)
    # print(score)
    if len(score) <= 1:
        print('calculate error.')
        return 2.0
    return float(score[1][:-1])
def _addnoise_and_decoder_one_batch(i_p, speaker_id, sub_process_speaker_num,
                                    waves_dir, noise_dir, sess, model):
    """
  x_wav, y_wav_est
  """
    s_time = time.time()
    noise_dir_list = [
        os.path.join(noise_dir, _dir) for _dir in os.listdir(noise_dir)
    ]
    n_noise = len(noise_dir_list)
    wave_dir_list = [
        os.path.join(waves_dir, _dir) for _dir in os.listdir(waves_dir)
    ]

    # print(len(wave_dir_list), os.path.dirname(wave_dir_list[0]))

    # mix && get input
    # x_batch = [] # [n_wav, time, 257]
    # x_theta_batch = [] # [n_wav, time, 257]
    # x_lengths = [] # [n_wav]
    batch_size = 0
    for wav_dir in wave_dir_list:
        batch_size += 1
        y_wave, sr_y = audio_tool.read_audio(wav_dir)
        if y_wave.ndim != 1:  # aishell has 2 channel wav
            y_wave = y_wave.T[0] + y_wave.T[1]
        noise_id = np.random.randint(n_noise)
        noise_wave, sr_n = audio_tool.read_audio(noise_dir_list[noise_id])
        noise_wave = audio_tool.repeat_to_len(noise_wave, len(y_wave))
        x_wave, alpha = audio_tool._mix_wav_by_randomSNR(y_wave, noise_wave)

        assert sr_y == sr_n and sr_y == 16000, 'sr error sr_y:%d, sr_n %d' % (
            sr_y, sr_n)
        x_wav_dir = wav_dir.replace('wav', addnoise_dir_name, 1)
        x_wav_dir = x_wav_dir.replace(root_dir, new_root_dir, 1)
        x_wav_father_dir = os.path.dirname(x_wav_dir)
        if not os.path.exists(x_wav_father_dir):
            os.makedirs(x_wav_father_dir)
        audio_tool.write_audio(x_wav_dir, x_wave, sr_y)

        x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(
            x_wave,  # [time, 257]
            PARAM.NFFT,
            PARAM.OVERLAP)
        x_phase_t = spectrum_tool.phase_spectrum_librosa_stft(
            x_wave, PARAM.NFFT, PARAM.OVERLAP)
        # x_batch.append(x_spec_t)
        # x_theta_batch.append(x_phase_t)
        # x_lengths.append(np.shape(x_spec_t)[0])

        x_batch = np.array([x_spec_t], dtype=np.float32)
        x_theta_batch = np.array([x_phase_t], dtype=np.float32)
        x_lengths = np.array([np.shape(x_spec_t)[0]], dtype=np.int32)

        # enhance
        y_mag_est = sess.run(model.y_mag_estimation,
                             feed_dict={
                                 model.x_mag: x_batch,
                                 model.x_theta: x_theta_batch,
                                 model.lengths: x_lengths,
                             })

        # istf && save
        if PARAM.RESTORE_PHASE != 'MIXED':
            raise ValueError('Please set PARAM.RESTORE_PHASE=MIXED.')
        # istft
        y_mag_est = y_mag_est * np.exp(1j * x_phase_t)
        reY = spectrum_tool.librosa_istft(y_mag_est, PARAM.NFFT, PARAM.OVERLAP)
        y_wav_dir = wav_dir.replace('wav', enhanced_dir_name, 1)
        y_wav_dir = y_wav_dir.replace(root_dir, new_root_dir, 1)
        y_wav_father_dir = os.path.dirname(y_wav_dir)
        if not os.path.exists(y_wav_father_dir):
            os.makedirs(y_wav_father_dir)
        audio_tool.write_audio(y_wav_dir, reY, PARAM.FS)

    max_len = np.max(x_lengths)

    e_time = time.time()
    print("\n----------------\n"
          "%d workers\n"
          "%s\n"
          "Worker_id %03d, rate of progress: %d/%d\n"
          "time_step_max_len: %d\n"
          "batch_sie: %d\n"
          'batch_cost_time: %ds\n' %
          (num_process, time.ctime(), i_p + 1, speaker_id,
           sub_process_speaker_num, max_len, batch_size, e_time - s_time),
          flush=True)
def decode_and_getMeature(mixed_file_list, ref_list, sess, model,
                          decode_ans_file, save_audio, ans_file):
    '''
  (mixed_dir,ref_dir,sess,model,'decode_nnet_C001_8_2',False,'xxxans.txt')
  '''
    if os.path.exists(os.path.join(decode_ans_file, ans_file)):
        os.remove(os.path.join(decode_ans_file, ans_file))
    pesq_raw_sum = 0
    pesq_en_sum = 0
    stoi_raw_sum = 0
    stoi_en_sum = 0
    sdr_raw_sum = 0
    sdr_en_sum = 0
    for i, mixed_dir in enumerate(mixed_file_list):
        print('\n', i + 1, mixed_dir)
        waveData, sr = audio_tool.read_audio(mixed_dir)
        reY, mask = decode_one_wav(sess, model, waveData)
        abs_max = (2**(MIXED_AISHELL_PARAM.AUDIO_BITS - 1) - 1)
        reY = np.where(reY > abs_max, abs_max, reY)
        reY = np.where(reY < -abs_max, -abs_max, reY)
        file_name = mixed_dir[mixed_dir.rfind('/') + 1:mixed_dir.rfind('.')]
        if save_audio:
            audio_tool.write_audio(
                os.path.join(decode_ans_file, (ckpt + '_%03d_' % (i + 1)) +
                             mixed_dir[mixed_dir.rfind('/') + 1:]), reY, sr)
            spectrum_tool.picture_spec(
                mask,
                os.path.join(decode_ans_file,
                             (ckpt + '_%03d_' % (i + 1)) + file_name))

        if i < len(ref_list):
            ref, sr = audio_tool.read_audio(ref_list[i])
            print(' refer: ', ref_list[i])
            len_small = min(len(ref), len(waveData), len(reY))
            ref = np.array(ref[:len_small])
            waveData = np.array(waveData[:len_small])
            reY = np.array(reY[:len_small])
            # sdr
            sdr_raw = audio_tool.cal_SDR(np.array([ref]), np.array([waveData]))
            sdr_en = audio_tool.cal_SDR(np.array([ref]), np.array(reY))
            sdr_raw_sum += sdr_raw
            sdr_en_sum += sdr_en
            # pesq
            # pesq_raw = pesq(ref,waveData,sr)
            # pesq_en = pesq(ref,reY,sr)
            pesq_raw = pesqexe.calc_pesq(ref, waveData, sr)
            pesq_en = pesqexe.calc_pesq(ref, reY, sr)
            pesq_raw_sum += pesq_raw
            pesq_en_sum += pesq_en
            # stoi
            stoi_raw = stoi.stoi(ref, waveData, sr)
            stoi_en = stoi.stoi(ref, reY, sr)
            stoi_raw_sum += stoi_raw
            stoi_en_sum += stoi_en
            print("SR = %d" % sr)
            print("PESQ_raw: %.3f, PESQ_en: %.3f, PESQimp: %.3f. " %
                  (pesq_raw, pesq_en, pesq_en - pesq_raw))
            print("SDR_raw: %.3f, SDR_en: %.3f, SDRimp: %.3f. " %
                  (sdr_raw, sdr_en, sdr_en - sdr_raw))
            print("STOI_raw: %.3f, STOI_en: %.3f, STOIimp: %.3f. " %
                  (stoi_raw, stoi_en, stoi_en - stoi_raw))
            sys.stdout.flush()
            with open(os.path.join(decode_ans_file, ans_file), 'a+') as f:
                f.write(file_name + '\r\n')
                f.write(
                    "    |-PESQ_raw: %.3f, PESQ_en: %.3f, PESQimp: %.3f. \r\n"
                    % (pesq_raw, pesq_en, pesq_en - pesq_raw))
                f.write(
                    "    |-SDR_raw: %.3f, SDR_en: %.3f, SDRimp: %.3f. \r\n" %
                    (sdr_raw, sdr_en, sdr_en - sdr_raw))
                f.write(
                    "    |-STOI_raw: %.3f, STOI_en: %.3f, STOIimp: %.3f. \r\n"
                    % (stoi_raw, stoi_en, stoi_en - stoi_raw))

    len_list = len(ref_list)
    with open(os.path.join(decode_ans_file, ans_file), 'a+') as f:
        f.write('PESQ_raw:%.3f, PESQ_en:%.3f, PESQi_avg:%.3f. \r\n' %
                (pesq_raw_sum / len_list, pesq_en_sum / len_list,
                 (pesq_en_sum - pesq_raw_sum) / len_list))
        f.write('SDR_raw:%.3f, SDR_en:%.3f, SDRi_avg:%.3f. \r\n' %
                (sdr_raw_sum / len_list, sdr_en_sum / len_list,
                 (sdr_en_sum - sdr_raw_sum) / len_list))
        f.write('STOI_raw:%.3f, STOI_en:%.3f, STOIi_avg:%.3f. \r\n' %
                (stoi_raw_sum / len_list, stoi_en_sum / len_list,
                 (stoi_en_sum - stoi_raw_sum) / len_list))
    print('\n\n\n-----------------------------------------')
    print('PESQ_raw:%.3f, PESQ_en:%.3f, PESQi_avg:%.3f. \r\n' %
          (pesq_raw_sum / len_list, pesq_en_sum / len_list,
           (pesq_en_sum - pesq_raw_sum) / len_list))
    print('SDR_raw:%.3f, SDR_en:%.3f, SDRi_avg:%.3f. \r\n' %
          (sdr_raw_sum / len_list, sdr_en_sum / len_list,
           (sdr_en_sum - sdr_raw_sum) / len_list))
    print('STOI_raw:%.3f, STOI_en:%.3f, STOIi_avg:%.3f. \r\n' %
          (stoi_raw_sum / len_list, stoi_en_sum / len_list,
           (stoi_en_sum - stoi_raw_sum) / len_list))
    sys.stdout.flush()
Beispiel #4
0
def addnoise_and_decoder_one_batch(waves_dir, noise_dir, sess, model):
  """
  x_wav, y_wav_est
  """
  s_time = time.time()
  global speaker_n
  speaker_n += 1
  print("\n----------------\n","%d/%d"%(speaker_n,all_speaker))
  sys.stdout.flush()
  noise_dir_list = [os.path.join(noise_dir, _dir) for _dir in os.listdir(noise_dir)]
  n_noise = len(noise_dir_list)
  wave_dir_list = [os.path.join(waves_dir, _dir) for _dir in os.listdir(waves_dir)]

  # print(len(wave_dir_list), os.path.dirname(wave_dir_list[0]))

  # mix && get input
  x_batch = [] # [n_wav, time, 257]
  x_theta_batch = [] # [n_wav, time, 257]
  x_lengths = [] # [n_wav]
  for wav_dir in wave_dir_list:
    y_wave, sr_y = audio_tool.read_audio(wav_dir)
    if y_wave.ndim != 1: # aishell has 2 channel wav
      y_wave = y_wave.T[0]+y_wave.T[1]
    noise_id = np.random.randint(n_noise)
    noise_wave, sr_n = audio_tool.read_audio(noise_dir_list[noise_id])
    noise_wave = audio_tool.repeat_to_len(noise_wave, len(y_wave))
    x_wave, alpha = audio_tool._mix_wav_by_randomSNR(y_wave, noise_wave)

    assert sr_y == sr_n and sr_y == 16000, 'sr error sr_y:%d, sr_n %d' % (sr_y, sr_n)
    x_wav_dir = wav_dir.replace('wav', addnoise_dir_name, 1)
    x_wav_father_dir = os.path.dirname(x_wav_dir)
    if not os.path.exists(x_wav_father_dir):
      os.makedirs(x_wav_father_dir)
    audio_tool.write_audio(x_wav_dir, x_wave, sr_y)

    x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(x_wave, # [time, 257]
                                                             PARAM.NFFT,
                                                             PARAM.OVERLAP)
    x_phase_t = spectrum_tool.phase_spectrum_librosa_stft(x_wave,
                                                          PARAM.NFFT,
                                                          PARAM.OVERLAP)
    x_batch.append(x_spec_t)
    x_theta_batch.append(x_phase_t)
    x_lengths.append(np.shape(x_spec_t)[0])

  max_len = np.max(x_lengths)
  print("time_step_max_len:",max_len)
  sys.stdout.flush()

  x_batch_mat = []
  x_theta_batch_mat = []
  for x_spec, x_theta, length in zip(x_batch, x_theta_batch, x_lengths):
    x_spec_mat = np.pad(x_spec, ((0,max_len-length),(0,0)), 'constant', constant_values=((0,0),(0,0)))
    x_theta_mat = np.pad(x_theta, ((0,max_len-length),(0,0)), 'constant', constant_values=((0,0),(0,0)))
    x_batch_mat.append(x_spec_mat)
    x_theta_batch_mat.append(x_theta_mat)

  x_batch = np.array(x_batch_mat, dtype=np.float32)
  x_theta_batch = np.array(x_theta_batch_mat, dtype=np.float32)
  x_lengths = np.array(x_lengths, dtype=np.int32)


  # enhance
  y_mag_est_batch = sess.run(
      model.y_mag_estimation,
      feed_dict={
          model.x_mag: x_batch,
          model.x_theta: x_theta_batch,
          model.lengths: x_lengths,
      })

  # istf && save
  print(np.shape(y_mag_est_batch), np.shape(x_theta_batch), np.shape(x_lengths))
  sys.stdout.flush()
  for y_mag_est, x_theta, length, wav_dir in zip(y_mag_est_batch, x_theta_batch, x_lengths, wave_dir_list):
    if PARAM.RESTORE_PHASE != 'MIXED':
      raise ValueError('Please set PARAM.RESTORE_PHASE=MIXED.')
    # cat padding
    y_mag_est = y_mag_est[:length,:]
    x_theta = x_theta[:length,:]

    # istft
    y_mag_est = y_mag_est*np.exp(1j*x_theta)
    reY = spectrum_tool.librosa_istft(y_mag_est, PARAM.NFFT, PARAM.OVERLAP)
    y_wav_dir = wav_dir.replace('wav', enhanced_dir_name, 1)
    y_wav_father_dir = os.path.dirname(y_wav_dir)
    if not os.path.exists(y_wav_father_dir):
      os.makedirs(y_wav_father_dir)
    audio_tool.write_audio(y_wav_dir, reY, PARAM.FS)

  e_time = time.time()
  print('batch_cost_time: %ds' % (e_time-s_time), flush=True)
        if MIXED_AISHELL_PARAM.FS == 8000:
            decode_file_list = decode_file_list_8k
        elif MIXED_AISHELL_PARAM.FS == 16000:
            decode_file_list = decode_file_list_16k
        else:
            print('PARAM.FS error, exit.'), exit(-1)
        for i, mixed_dir in enumerate(decode_file_list):
            print(i + 1, mixed_dir)
            waveData, sr = audio_tool.read_audio(mixed_dir)
            reY, mask = decode_one_wav(sess, model, waveData)
            print(np.max(reY))
            abs_max = (2**(MIXED_AISHELL_PARAM.AUDIO_BITS - 1) - 1)
            reY = np.where(reY > abs_max, abs_max, reY)
            reY = np.where(reY < -abs_max, -abs_max, reY)
            audio_tool.write_audio(
                os.path.join(decode_ans_file, (ckpt + '_%03d_' % (i + 1)) +
                             mixed_dir[mixed_dir.rfind('/') + 1:]), reY, sr)
            file_name = mixed_dir[mixed_dir.rfind('/') +
                                  1:mixed_dir.rfind('.')]
            spectrum_tool.picture_spec(
                mask,
                os.path.join(decode_ans_file,
                             (ckpt + '_%03d_' % (i + 1)) + file_name))
    elif int(sys.argv[1]) == 0:  # decode exp/test_oc
        mixed_dir = 'exp/test_oc/mixed_wav'
        decode_file_list = os.listdir(mixed_dir)
        decode_file_list = [
            os.path.join(mixed_dir, mixed) for mixed in decode_file_list
        ]
        decode_file_list.sort()