Example #1
0
def convert(predictor, df):
    a, b, c = next(df().get_data())
    pred_spec, r_spec = predictor(a, b, c)

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.default.max_db, hp.default.min_db)
    r_spec = denormalize_db(r_spec, hp.default.max_db, hp.default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    r_spec = db2amp(r_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
    r_spec = np.power(r_spec, hp.convert.emphasis_magnitude)

    # Spectrogram to waveform
    audio = np.array(
        list(map(lambda spec: spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length,
                                       hp.default.n_iter), pred_spec)))
    y_audio = np.array(
        list(map(lambda spec: spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length,
                                       hp.default.n_iter), r_spec)))

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.default.preemphasis)
    y_audio = inv_preemphasis(y_audio, coeff=hp.default.preemphasis)

    # if hp.convert.one_full_wav:
    #     # Concatenate to a wav
    #     y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
    #     audio = np.reshape(audio, (1, audio.size), order='C')

    return audio, y_audio
Example #2
0
def convert(predictor, df):
    pred_spec, y_spec, ppgs = predictor(next(df().get_data()))

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.Default.max_db, hp.Default.min_db)
    y_spec = denormalize_db(y_spec, hp.Default.max_db, hp.Default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    y_spec = db2amp(y_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.Convert.emphasis_magnitude)
    y_spec = np.power(y_spec, hp.Convert.emphasis_magnitude)

    # Spectrogram to waveform
    audio = np.array([spec2wav(spec.T, hp.Default.n_fft, hp.Default.win_length, hp.Default.hop_length,
                               hp.Default.n_iter) for spec in pred_spec])
    y_audio = np.array([spec2wav(spec.T, hp.Default.n_fft, hp.Default.win_length, hp.Default.hop_length,
                                 hp.Default.n_iter) for spec in y_spec])

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.Default.preemphasis)
    y_audio = inv_preemphasis(y_audio, coeff=hp.Default.preemphasis)

    # if hp.Convert.one_full_wav:
    #     # Concatenate to a wav
    #     y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
    #     audio = np.reshape(audio, (1, audio.size), order='C')

    return audio, y_audio, ppgs
Example #3
0
File: convert.py Project: v-ilin/vc
def convert(predictor, df):

    t = next(df().get_data())
    print(t[0].shape)
    pred_spec, y_spec, ppgs = predictor(next(df().get_data()))

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.default.max_db, hp.default.min_db)
    y_spec = denormalize_db(y_spec, hp.default.max_db, hp.default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    y_spec = db2amp(y_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
    y_spec = np.power(y_spec, hp.convert.emphasis_magnitude)
    # Spectrogram to waveform
    audio = np.array(
        map(
            lambda spec:
            spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                     default.hop_length, hp.default.n_iter), pred_spec))
    librosa.output.write_wav(
        '/home/user/vilin/deep-voice-conversion/output/file_trim_8.wav',
        audio[0], hp.default.sr)

    y_audio = np.array(
        map(
            lambda spec:
            spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                     default.hop_length, hp.default.n_iter), y_spec))

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.default.preemphasis)
    y_audio = inv_preemphasis(y_audio, coeff=hp.default.preemphasis)

    # if hp.convert.one_full_wav:
    #     # Concatenate to a wav
    #     y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
    #     audio = np.reshape(audio, (1, audio.size), order='C')

    return audio, y_audio, ppgs
Example #4
0
def convert(predictor, data):
    x_mfccs, y_spec, y_mel = data
    x_mfccs, y_spec, y_mel = data
    x_mfccs = np.array(x_mfccs).reshape((-1, ) + x_mfccs.shape)
    y_spec = np.array(y_spec).reshape((-1, ) + y_spec.shape)
    y_mel = np.array(y_mel).reshape((-1, ) + y_mel.shape)
    pred_spec, y_spec, ppgs = predictor(x_mfccs, y_spec, y_mel)

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.Default.max_db, hp.Default.min_db)
    y_spec = denormalize_db(y_spec, hp.Default.max_db, hp.Default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    y_spec = db2amp(y_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.Convert.emphasis_magnitude)
    y_spec = np.power(y_spec, hp.Convert.emphasis_magnitude)

    # Spectrogram to waveform
    audio = np.array([
        spec2wav(spec.T, hp.Default.n_fft, hp.Default.win_length,
                 hp.Default.hop_length, hp.Default.n_iter)
        for spec in pred_spec
    ])
    y_audio = np.array([
        spec2wav(spec.T, hp.Default.n_fft, hp.Default.win_length,
                 hp.Default.hop_length, hp.Default.n_iter) for spec in y_spec
    ])

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.Default.preemphasis)
    y_audio = inv_preemphasis(y_audio, coeff=hp.Default.preemphasis)

    if hp.Convert.one_full_wav:
        # Concatenate to a wav
        y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
        audio = np.reshape(audio, (1, audio.size), order='C')

    return audio, y_audio, ppgs
Example #5
0
 def predict(self, path_to_wav):
     x_mfcc, y_spec, mel = get_mfccs_and_spectrogram(path_to_wav)
     pred_spec, y_spec, ppgs = self.predictor(x_mfcc, y_spec, mel)
     pred_spec = denormalize_db(pred_spec, hp.default.max_db,
                                hp.default.min_db)
     y_spec = denormalize_db(y_spec, hp.default.max_db, hp.default.min_db)
     pred_spec = db2amp(pred_spec)
     y_spec = db2amp(y_spec)
     pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
     y_spec = np.power(y_spec, hp.convert.emphasis_magnitude)
     # Spectrogram to waveform
     audio = np.array(
         map(
             lambda spec:
             spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                      default.hop_length, hp.default.n_iter), pred_spec))
     y_audio = np.array(
         map(
             lambda spec:
             spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                      default.hop_length, hp.default.n_iter), y_spec))
Example #6
0
def convert(predictor, df):
    # TODO need to fix reading in with duration
    pred_spec, y_spec, ppgs = predictor(next(df().get_data()))

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.default.max_db, hp.default.min_db)
    y_spec = denormalize_db(y_spec, hp.default.max_db, hp.default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    y_spec = db2amp(y_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
    y_spec = np.power(y_spec, hp.convert.emphasis_magnitude)

    # Spectrogram to waveform
    audio = np.array(
        map(
            lambda spec:
            spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                     default.hop_length, hp.default.n_iter), pred_spec))
    y_audio = np.array(
        map(
            lambda spec:
            spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                     default.hop_length, hp.default.n_iter), y_spec))

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.default.preemphasis)
    y_audio = inv_preemphasis(y_audio, coeff=hp.default.preemphasis)

    if hp.convert.one_full_wav:
        # Concatenate to a wav
        y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
        audio = np.reshape(audio, (1, audio.size), order='C')

    return audio, y_audio, ppgs
Example #7
0
def sumimage(mel, mel_name):
    mel = mel  #+ 0.001 * np.random.standard_normal([hp.batch_size, hp.duration * hp.n_mels, hp.n_mels])
    mel_image = mel.transpose(0, 2, 1)
    heatmap = np.expand_dims(mel_image, 3)
    tf.summary.image(mel_name, heatmap, max_outputs=mel_image.shape[0])

    mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels)
    mel_basis = np.mat(mel_basis)
    mel_basis_I = mel_basis.I
    mel_spec = []

    for i in range(len(mel)):
        print(mel_name)
        print(np.max(mel[i]))
        print(np.min(mel[i]))
        print(np.mean(mel[i]))
        #mel[i] = mel[i] * (0.6 / np.max(mel[i]))
        mel_db_item = np.transpose(mel[i])
        mel_db_item = denormalize_0_1(mel_db_item, hp.max_db, hp.min_db)
        #mel_db_item = np.maximum(mel_db_item, 0)
        # = normalize_0_1(mel_db_item, hp.default.max_db, hp.default.min_db)

        print(np.max(mel_db_item))
        print(np.mean(mel_db_item))

        mel_item = db2amp(mel_db_item)
        print(np.max(mel_item))

        mag_item = np.dot(mel_basis_I, mel_item)
        print(np.max(mel_item))
        mag_item = np.maximum(mag_item, 0)
        spec_item = np.transpose(mag_item)

        #mag_db_item = amp2db(mag_item)
        #mag_db_item = normalize_0_1(mag_db_item, hp.default.max_db, hp.default.min_db)
        #mag_db_item = np.transpose(mag_db_item)
        #specitem = np.transpose(magitem)
        #mel_complex = mel_D_abs + np.complex(0, 0)
        #specitem = librosa.istft(stft_matrix=mel_complex, hop_length=hp.default.hop_length, win_length=hp.default.win_length)
        mel_spec.append(spec_item.getA())

    mel_spec = np.power(mel_spec, hp.emphasis_magnitude)
    mel_audio = np.array(
        list(
            map(
                lambda spec: spec2wav(spec.T, hp.n_fft, hp.win_length, hp.
                                      hop_length, hp.n_iter), mel_spec)))

    mel_audio = inv_preemphasis(mel_audio, coeff=hp.preemphasis)
    tf.summary.audio(mel_name, mel_audio, hp.sr, max_outputs=hp.batch_size)
Example #8
0
def sumspecimage(spec, spec_name):
    spec = denormalize_db(spec, hp.max_db, hp.min_db)
    spec = db2amp(spec)

    spec_image = spec.transpose(0, 2, 1)
    heatmap = np.expand_dims(spec_image, 3)
    tf.summary.image(spec_name, heatmap, max_outputs=spec_image.shape[0])

    out_spec = np.power(np.maximum(spec, 0), 1)  #hp.emphasis_magnitude)
    out_audio = np.array(
        list(
            map(
                lambda spec: spec2wav(spec.T, hp.n_fft, hp.win_length, hp.
                                      hop_length, hp.n_iter), out_spec)))

    out_audio = inv_preemphasis(out_audio, coeff=hp.preemphasis)
    tf.summary.audio(spec_name, out_audio, hp.sr, max_outputs=hp.batch_size)
Example #9
0
def convert(predictor, tensor):
    # tensor = next(df().get_data())
    # print(tensor.shape)
    pred_spec, y_spec, ppgs = predictor(tensor)
    # pred_spec, y_spec, ppgs = predictor(tf.expand_dims(df, 0))

    # Denormalizatoin
    pred_spec = denormalize_db(pred_spec, hp.default.max_db, hp.default.min_db)
    # y_spec = denormalize_db(y_spec, hp.default.max_db, hp.default.min_db)

    # Db to amp
    pred_spec = db2amp(pred_spec)
    # y_spec = db2amp(y_spec)

    # Emphasize the magnitude
    pred_spec = np.power(pred_spec, hp.convert.emphasis_magnitude)
    # y_spec = np.power(y_spec, hp.convert.emphasis_magnitude)

    # Spectrogram to waveform
    audio = np.array(
        map(
            lambda spec:
            spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.
                     default.hop_length, hp.default.n_iter), pred_spec))
    # y_audio = np.array(map(lambda spec: spec2wav(spec.T, hp.default.n_fft, hp.default.win_length, hp.default.hop_length,
    #                                              hp.default.n_iter), y_spec))

    # Apply inverse pre-emphasis
    audio = inv_preemphasis(audio, coeff=hp.default.preemphasis)
    # y_audio = inv_preemphasis(y_audio, coeff=hp.default.preemphasis)
    # pickle.dump( y_audio, open( "y-audio.p", "wb" ) )
    # pickle.dump( audio, open( "o-audio.p", "wb" ) )

    # if hp.convert.one_full_wav:
    #     # Concatenate to a wav
    #     y_audio = np.reshape(y_audio, (1, y_audio.size), order='C')
    #     audio = np.reshape(audio, (1, audio.size), order='C')

    # return audio, y_audio, ppgs
    return audio, ppgs
Example #10
0
import numpy as np
import matplotlib.pyplot as plt

from audio import spec2wav, wav2spec, read_wav, write_wav


if __name__ == '__main__':

    sr = 22050
    n_fft = 512
    win_length = 400
    hop_length = 80
    duration = 2 # sec

    wav = read_wav( "H:\\cs230\\wav_x\\1_1.wav", sr, duration )
    spec, _ = wav2spec(wav, n_fft, win_length, hop_length, False)

    converted_wav = spec2wav(spec, n_fft, win_length, hop_length, 600)

    write_wav(converted_wav, sr, 'a.wav')


    plt.pcolormesh(spec)
    plt.ylabel('Frequency')
    plt.xlabel('Time')
    plt.savefig("a.png")