Beispiel #1
0
def decode_one_wav(sess, model, wavedata):
    x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(
        wavedata, PARAM.NFFT, PARAM.OVERLAP)
    x_phase_t = spectrum_tool.phase_spectrum_librosa_stft(
        wavedata, PARAM.NFFT, PARAM.OVERLAP)
    length = np.shape(x_spec_t)[0]
    x_spec = np.array([x_spec_t], dtype=np.float32)
    x_theta = np.array([x_phase_t], dtype=np.float32)
    lengths = np.array([length], dtype=np.int32)
    y_theta_est, y_mag_estimation, mask, x_mag, = sess.run([
        model.y_theta_estimation,
        model.y_mag_estimation,
        model.mask,
        model._x_mag_spec,
    ],
                                                           feed_dict={
                                                               model.x_mag:
                                                               x_spec,
                                                               model.lengths:
                                                               lengths,
                                                               model.x_theta:
                                                               x_theta,
                                                           })

    y_mag_estimation = np.array(y_mag_estimation[0])
    mask = np.array(mask[0])
    # print(np.shape(y_mag_estimation), np.shape(mask))
    if PARAM.RESTORE_PHASE == 'MIXED':
        y_mag_estimation = y_mag_estimation * np.exp(
            1j * spectrum_tool.phase_spectrum_librosa_stft(
                wavedata, PARAM.NFFT, PARAM.OVERLAP))
        reY = spectrum_tool.librosa_istft(y_mag_estimation, PARAM.NFFT,
                                          PARAM.OVERLAP)
    elif PARAM.RESTORE_PHASE == 'GRIFFIN_LIM':
        reY = spectrum_tool.griffin_lim(y_mag_estimation, PARAM.NFFT,
                                        PARAM.OVERLAP, PARAM.GRIFFIN_ITERNUM,
                                        wavedata)
    elif PARAM.RESTORE_PHASE == 'ESTIMATE':
        if y_theta_est is None:
            print('Model cannot estimate y_theta.')
            exit(-1)
        y_mag_estimation = y_mag_estimation * np.exp(1j * y_theta_est)
        reY = spectrum_tool.librosa_istft(y_mag_estimation, PARAM.NFFT,
                                          PARAM.OVERLAP)
    else:
        print('RESTORE_PHASE error.')
        exit(-1)

    # print(np.shape(mask), np.max(mask), np.min(mask))
    # print(np.shape(x_mag), np.max(x_mag), np.min(x_mag))
    # print(np.shape(norm_x_mag), np.max(norm_x_mag), np.min(norm_x_mag))
    # print(np.shape(norm_logmag), np.max(norm_logmag), np.min(norm_logmag))
    # print(np.shape(y_mag_estimation), np.max(y_mag_estimation), np.min(y_mag_estimation))
    # spectrum_tool.picture_spec(mask[0],"233")
    return reY, mask
def decode_one_wav(sess, model, wavedata):
    x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(
        wavedata, PARAM.NFFT, PARAM.OVERLAP)
    length = np.shape(x_spec_t)[0]
    x_spec = np.array([x_spec_t], dtype=np.float32)
    lengths = np.array([length], dtype=np.int32)
    cleaned, mask, x_mag, norm_x_mag, norm_logmag = sess.run([
        model.cleaned, model.mask, model._x_mag_spec, model._norm_x_mag_spec,
        model._norm_x_logmag_spec
    ],
                                                             feed_dict={
                                                                 model.inputs:
                                                                 x_spec,
                                                                 model.lengths:
                                                                 lengths,
                                                             })

    cleaned = np.array(cleaned)
    if PARAM.RESTORE_PHASE == 'MIXED':
        cleaned = cleaned * np.exp(
            1j * spectrum_tool.phase_spectrum_librosa_stft(
                wavedata, PARAM.NFFT, PARAM.OVERLAP))
        reY = spectrum_tool.librosa_istft(cleaned, PARAM.NFFT, PARAM.OVERLAP)
    elif PARAM.RESTORE_PHASE == 'GRIFFIN_LIM':
        reY = spectrum_tool.griffin_lim(cleaned, PARAM.NFFT, PARAM.OVERLAP,
                                        PARAM.GRIFFIN_ITERNUM, wavedata)

    # print(np.shape(mask), np.max(mask), np.min(mask))
    # print(np.shape(x_mag), np.max(x_mag), np.min(x_mag))
    # print(np.shape(norm_x_mag), np.max(norm_x_mag), np.min(norm_x_mag))
    # print(np.shape(norm_logmag), np.max(norm_logmag), np.min(norm_logmag))
    # spectrum_tool.picture_spec(mask[0],"233")

    return reY
def _addnoise_and_decoder_one_batch(i_p, speaker_id, sub_process_speaker_num,
                                    waves_dir, noise_dir, sess, model):
    """
  x_wav, y_wav_est
  """
    s_time = time.time()
    noise_dir_list = [
        os.path.join(noise_dir, _dir) for _dir in os.listdir(noise_dir)
    ]
    n_noise = len(noise_dir_list)
    wave_dir_list = [
        os.path.join(waves_dir, _dir) for _dir in os.listdir(waves_dir)
    ]

    # print(len(wave_dir_list), os.path.dirname(wave_dir_list[0]))

    # mix && get input
    # x_batch = [] # [n_wav, time, 257]
    # x_theta_batch = [] # [n_wav, time, 257]
    # x_lengths = [] # [n_wav]
    batch_size = 0
    for wav_dir in wave_dir_list:
        batch_size += 1
        y_wave, sr_y = audio_tool.read_audio(wav_dir)
        if y_wave.ndim != 1:  # aishell has 2 channel wav
            y_wave = y_wave.T[0] + y_wave.T[1]
        noise_id = np.random.randint(n_noise)
        noise_wave, sr_n = audio_tool.read_audio(noise_dir_list[noise_id])
        noise_wave = audio_tool.repeat_to_len(noise_wave, len(y_wave))
        x_wave, alpha = audio_tool._mix_wav_by_randomSNR(y_wave, noise_wave)

        assert sr_y == sr_n and sr_y == 16000, 'sr error sr_y:%d, sr_n %d' % (
            sr_y, sr_n)
        x_wav_dir = wav_dir.replace('wav', addnoise_dir_name, 1)
        x_wav_dir = x_wav_dir.replace(root_dir, new_root_dir, 1)
        x_wav_father_dir = os.path.dirname(x_wav_dir)
        if not os.path.exists(x_wav_father_dir):
            os.makedirs(x_wav_father_dir)
        audio_tool.write_audio(x_wav_dir, x_wave, sr_y)

        x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(
            x_wave,  # [time, 257]
            PARAM.NFFT,
            PARAM.OVERLAP)
        x_phase_t = spectrum_tool.phase_spectrum_librosa_stft(
            x_wave, PARAM.NFFT, PARAM.OVERLAP)
        # x_batch.append(x_spec_t)
        # x_theta_batch.append(x_phase_t)
        # x_lengths.append(np.shape(x_spec_t)[0])

        x_batch = np.array([x_spec_t], dtype=np.float32)
        x_theta_batch = np.array([x_phase_t], dtype=np.float32)
        x_lengths = np.array([np.shape(x_spec_t)[0]], dtype=np.int32)

        # enhance
        y_mag_est = sess.run(model.y_mag_estimation,
                             feed_dict={
                                 model.x_mag: x_batch,
                                 model.x_theta: x_theta_batch,
                                 model.lengths: x_lengths,
                             })

        # istf && save
        if PARAM.RESTORE_PHASE != 'MIXED':
            raise ValueError('Please set PARAM.RESTORE_PHASE=MIXED.')
        # istft
        y_mag_est = y_mag_est * np.exp(1j * x_phase_t)
        reY = spectrum_tool.librosa_istft(y_mag_est, PARAM.NFFT, PARAM.OVERLAP)
        y_wav_dir = wav_dir.replace('wav', enhanced_dir_name, 1)
        y_wav_dir = y_wav_dir.replace(root_dir, new_root_dir, 1)
        y_wav_father_dir = os.path.dirname(y_wav_dir)
        if not os.path.exists(y_wav_father_dir):
            os.makedirs(y_wav_father_dir)
        audio_tool.write_audio(y_wav_dir, reY, PARAM.FS)

    max_len = np.max(x_lengths)

    e_time = time.time()
    print("\n----------------\n"
          "%d workers\n"
          "%s\n"
          "Worker_id %03d, rate of progress: %d/%d\n"
          "time_step_max_len: %d\n"
          "batch_sie: %d\n"
          'batch_cost_time: %ds\n' %
          (num_process, time.ctime(), i_p + 1, speaker_id,
           sub_process_speaker_num, max_len, batch_size, e_time - s_time),
          flush=True)
Beispiel #4
0
def _extract_phase(data):
    theta = spectrum_tool.phase_spectrum_librosa_stft(data, PARAM.NFFT,
                                                      PARAM.OVERLAP)
    return theta
Beispiel #5
0
def addnoise_and_decoder_one_batch(waves_dir, noise_dir, sess, model):
  """
  x_wav, y_wav_est
  """
  s_time = time.time()
  global speaker_n
  speaker_n += 1
  print("\n----------------\n","%d/%d"%(speaker_n,all_speaker))
  sys.stdout.flush()
  noise_dir_list = [os.path.join(noise_dir, _dir) for _dir in os.listdir(noise_dir)]
  n_noise = len(noise_dir_list)
  wave_dir_list = [os.path.join(waves_dir, _dir) for _dir in os.listdir(waves_dir)]

  # print(len(wave_dir_list), os.path.dirname(wave_dir_list[0]))

  # mix && get input
  x_batch = [] # [n_wav, time, 257]
  x_theta_batch = [] # [n_wav, time, 257]
  x_lengths = [] # [n_wav]
  for wav_dir in wave_dir_list:
    y_wave, sr_y = audio_tool.read_audio(wav_dir)
    if y_wave.ndim != 1: # aishell has 2 channel wav
      y_wave = y_wave.T[0]+y_wave.T[1]
    noise_id = np.random.randint(n_noise)
    noise_wave, sr_n = audio_tool.read_audio(noise_dir_list[noise_id])
    noise_wave = audio_tool.repeat_to_len(noise_wave, len(y_wave))
    x_wave, alpha = audio_tool._mix_wav_by_randomSNR(y_wave, noise_wave)

    assert sr_y == sr_n and sr_y == 16000, 'sr error sr_y:%d, sr_n %d' % (sr_y, sr_n)
    x_wav_dir = wav_dir.replace('wav', addnoise_dir_name, 1)
    x_wav_father_dir = os.path.dirname(x_wav_dir)
    if not os.path.exists(x_wav_father_dir):
      os.makedirs(x_wav_father_dir)
    audio_tool.write_audio(x_wav_dir, x_wave, sr_y)

    x_spec_t = spectrum_tool.magnitude_spectrum_librosa_stft(x_wave, # [time, 257]
                                                             PARAM.NFFT,
                                                             PARAM.OVERLAP)
    x_phase_t = spectrum_tool.phase_spectrum_librosa_stft(x_wave,
                                                          PARAM.NFFT,
                                                          PARAM.OVERLAP)
    x_batch.append(x_spec_t)
    x_theta_batch.append(x_phase_t)
    x_lengths.append(np.shape(x_spec_t)[0])

  max_len = np.max(x_lengths)
  print("time_step_max_len:",max_len)
  sys.stdout.flush()

  x_batch_mat = []
  x_theta_batch_mat = []
  for x_spec, x_theta, length in zip(x_batch, x_theta_batch, x_lengths):
    x_spec_mat = np.pad(x_spec, ((0,max_len-length),(0,0)), 'constant', constant_values=((0,0),(0,0)))
    x_theta_mat = np.pad(x_theta, ((0,max_len-length),(0,0)), 'constant', constant_values=((0,0),(0,0)))
    x_batch_mat.append(x_spec_mat)
    x_theta_batch_mat.append(x_theta_mat)

  x_batch = np.array(x_batch_mat, dtype=np.float32)
  x_theta_batch = np.array(x_theta_batch_mat, dtype=np.float32)
  x_lengths = np.array(x_lengths, dtype=np.int32)


  # enhance
  y_mag_est_batch = sess.run(
      model.y_mag_estimation,
      feed_dict={
          model.x_mag: x_batch,
          model.x_theta: x_theta_batch,
          model.lengths: x_lengths,
      })

  # istf && save
  print(np.shape(y_mag_est_batch), np.shape(x_theta_batch), np.shape(x_lengths))
  sys.stdout.flush()
  for y_mag_est, x_theta, length, wav_dir in zip(y_mag_est_batch, x_theta_batch, x_lengths, wave_dir_list):
    if PARAM.RESTORE_PHASE != 'MIXED':
      raise ValueError('Please set PARAM.RESTORE_PHASE=MIXED.')
    # cat padding
    y_mag_est = y_mag_est[:length,:]
    x_theta = x_theta[:length,:]

    # istft
    y_mag_est = y_mag_est*np.exp(1j*x_theta)
    reY = spectrum_tool.librosa_istft(y_mag_est, PARAM.NFFT, PARAM.OVERLAP)
    y_wav_dir = wav_dir.replace('wav', enhanced_dir_name, 1)
    y_wav_father_dir = os.path.dirname(y_wav_dir)
    if not os.path.exists(y_wav_father_dir):
      os.makedirs(y_wav_father_dir)
    audio_tool.write_audio(y_wav_dir, reY, PARAM.FS)

  e_time = time.time()
  print('batch_cost_time: %ds' % (e_time-s_time), flush=True)