コード例 #1
0
def get_feature(wav_path, preprocessing=False, getsize=False):
    fs, x = wavfile.read(wav_path)
    x = x.astype(np.float64)
    if audio_world_config.use_harvest:
        f0, timeaxis = pyworld.harvest(
            x,
            fs,
            frame_period=audio_world_config.frame_period,
            f0_floor=audio_world_config.f0_floor,
            f0_ceil=audio_world_config.f0_ceil)
    else:
        f0, timeaxis = pyworld.dio(
            x,
            fs,
            frame_period=audio_world_config.frame_period,
            f0_floor=audio_world_config.f0_floor,
            f0_ceil=audio_world_config.f0_ceil)
        f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    bap = pyworld.code_aperiodicity(aperiodicity, fs)

    alpha = pysptk.util.mcepalpha(fs)
    mgc = pysptk.sp2mc(spectrogram,
                       order=audio_world_config.mgc_dim,
                       alpha=alpha)
    f0 = f0[:, None]
    lf0 = f0.copy()
    nonzero_indices = np.nonzero(f0)
    lf0[nonzero_indices] = np.log(f0[nonzero_indices])
    if audio_world_config.use_harvest:
        # https://github.com/mmorise/World/issues/35#issuecomment-306521887
        vuv = (aperiodicity[:, 0] < 0.5).astype(np.float32)[:, None]
    else:
        vuv = (lf0 != 0).astype(np.float32)
    lf0 = P.interp1d(lf0, kind=audio_world_config.f0_interpolation_kind)

    # Parameter trajectory smoothing
    if audio_world_config.mod_spec_smoothing:
        hop_length = int(fs * (audio_world_config.frame_period * 0.001))
        modfs = fs / hop_length
        mgc = P.modspec_smoothing(
            mgc, modfs, cutoff=audio_world_config.mod_spec_smoothing_cutoff)

    mgc = P.delta_features(mgc, audio_world_config.windows)
    lf0 = P.delta_features(lf0, audio_world_config.windows)
    bap = P.delta_features(bap, audio_world_config.windows)

    features = np.hstack((mgc, lf0, vuv, bap))
    if preprocessing:
        out_path = wav_path.replace(".wav", "").replace("wav", "world")
        np.save(out_path, features)
    elif getsize:
        feature, mgc.shape[0], lf0.shape[0], bap.shape[0]
    else:
        return features
コード例 #2
0
    def collect_features(self, wav_path, label_path):
        fs, x = wavfile.read(wav_path)
        x = x.astype(np.float64)
        if hp_acoustic.use_harvest:
            f0, timeaxis = pyworld.harvest(
                x, fs, frame_period=hp_acoustic.frame_period,
                f0_floor=hp_acoustic.f0_floor, f0_ceil=hp_acoustic.f0_ceil)
        else:
            f0, timeaxis = pyworld.dio(
                x, fs, frame_period=hp_acoustic.frame_period,
                f0_floor=hp_acoustic.f0_floor, f0_ceil=hp_acoustic.f0_ceil)
            f0 = pyworld.stonemask(x, f0, timeaxis, fs)
        spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
        aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

        bap = pyworld.code_aperiodicity(aperiodicity, fs)
        if self.alpha is None:
            self.alpha = pysptk.util.mcepalpha(fs)
        mgc = pysptk.sp2mc(spectrogram, order=hp_acoustic.order, alpha=self.alpha)
        f0 = f0[:, None]
        lf0 = f0.copy()
        nonzero_indices = np.nonzero(f0)
        lf0[nonzero_indices] = np.log(f0[nonzero_indices])
        if hp_acoustic.use_harvest:
            # https://github.com/mmorise/World/issues/35#issuecomment-306521887
            vuv = (aperiodicity[:, 0] < 0.5).astype(np.float32)[:, None]
        else:
            vuv = (lf0 != 0).astype(np.float32)
        lf0 = P.interp1d(lf0, kind=hp_acoustic.f0_interpolation_kind)

        # Parameter trajectory smoothing
        if hp_acoustic.mod_spec_smoothing:
            hop_length = int(fs * (hp_acoustic.frame_period * 0.001))
            modfs = fs / hop_length
            mgc = P.modspec_smoothing(
                mgc, modfs, cutoff=hp_acoustic.mod_spec_smoothing_cutoff)

        mgc = P.delta_features(mgc, hp_acoustic.windows)
        lf0 = P.delta_features(lf0, hp_acoustic.windows)
        bap = P.delta_features(bap, hp_acoustic.windows)

        features = np.hstack((mgc, lf0, vuv, bap))

        # Cut silence frames by HTS alignment
        labels = hts.load(label_path)
        features = features[:labels.num_frames()]
        indices = labels.silence_frame_indices()
        features = np.delete(features, indices, axis=0)

        return features.astype(np.float32)
コード例 #3
0
ファイル: predict.py プロジェクト: tkm2261/dnn-voice-changer
def generate_changed_voice(model, input_path):

    fs, x = wavfile.read(input_path)
    x = x.astype(np.float64)
    if len(x.shape) > 1:
        x = x.mean(axis=1)

    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]

    mc = P.modspec_smoothing(mc, FS / HOP_LENGHT, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    gen_data = model.predict(mc)

    gen_data = np.hstack([c0.reshape((-1, 1)), gen_data])

    fftlen = pyworld.get_cheaptrick_fft_size(fs)
    spectrogram = pysptk.mc2sp(
        gen_data.astype(np.float64), alpha=alpha, fftlen=fftlen)
    waveform = pyworld.synthesize(
        f0, spectrogram, aperiodicity, fs, hp.frame_period)

    return waveform
コード例 #4
0
def test_delta_features():
    T = 5
    static_dim = 2
    x = np.random.rand(T, static_dim)
    for windows in _get_windows_set():
        y = delta_features(x, windows)
        assert y.shape == (T, static_dim * len(windows))
コード例 #5
0
ファイル: delta.py プロジェクト: Iselix/kwiiyatta
    def function(self, feature, raw, key):
        if self.frame_period is None:
            self.frame_period = raw.frame_period
        elif self.frame_period != raw.frame_period:
            raise ValueError(f'frame_period of "{key}" is {raw.frame_period!r}'
                             f' but others are {self.frame_period!r}')

        return delta_features(feature, DELTA_WINDOWS)
コード例 #6
0
def test_vc_from_path(model, path, data_mean, data_std, diffvc=True):
    model.eval()

    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    # Apply model
    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)
    y_hat, y_hat_static = model(mc_scaled, R)
    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs
コード例 #7
0
ファイル: test_dataset.py プロジェクト: Iselix/kwiiyatta
def test_delta_dataset():
    base = {
        'fp5': feature.get_analyzer(dataset.CLB_WAV),
        'fp5_2': feature.get_analyzer(dataset.CLB_WAV2),
        'fp3': feature.get_analyzer(dataset.CLB_WAV, frame_period=3)
    }

    mcep_dataset = MelCepstrumDataset(base)
    delta_dataset = DeltaFeatureDataset(mcep_dataset)

    assert (delta_dataset['fp5'] == delta_features(mcep_dataset['fp5'],
                                                   DELTA_WINDOWS)).all()

    assert (delta_dataset['fp5_2'] == delta_features(mcep_dataset['fp5_2'],
                                                     DELTA_WINDOWS)).all()

    with pytest.raises(ValueError) as e:
        delta_dataset['fp3']

    assert 'frame_period of "fp3" is 3 but others are 5' == str(e.value)
コード例 #8
0
    def collect_features(self, wav_path, label_path):
        fs, x = wavfile.read(wav_path)
        x = x.astype(np.float64)
        f0, timeaxis = pyworld.dio(x,
                                   fs,
                                   frame_period=hp_acoustic.frame_period)
        f0 = pyworld.stonemask(x, f0, timeaxis, fs)
        spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
        aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

        bap = pyworld.code_aperiodicity(aperiodicity, fs)
        if self.alpha is None:
            self.alpha = pysptk.util.mcepalpha(fs)
        mgc = pysptk.sp2mc(spectrogram,
                           order=hp_acoustic.order,
                           alpha=self.alpha)
        f0 = f0[:, None]
        lf0 = f0.copy()
        nonzero_indices = np.nonzero(f0)
        lf0[nonzero_indices] = np.log(f0[nonzero_indices])
        vuv = (lf0 != 0).astype(np.float32)
        lf0 = P.interp1d(lf0, kind=hp_acoustic.f0_interpolation_kind)

        # 50hz parameter trajectory smoothing
        hop_length = int(fs * (hp_acoustic.frame_period * 0.001))
        modfs = fs / hop_length
        mgc = P.modspec_smoothing(mgc, modfs, cutoff=50)

        mgc = P.delta_features(mgc, hp_acoustic.windows)
        lf0 = P.delta_features(lf0, hp_acoustic.windows)
        bap = P.delta_features(bap, hp_acoustic.windows)

        features = np.hstack((mgc, lf0, vuv, bap))

        # Cut silence frames by HTS alignment
        labels = hts.load(label_path)
        features = features[:labels.num_frames()]
        indices = labels.silence_frame_indices()
        features = np.delete(features, indices, axis=0)

        return features.astype(np.float32)
コード例 #9
0
ファイル: delta.py プロジェクト: Iselix/kwiiyatta
    def convert(self, feature, raw, **kwargs):
        if self.frame_period != raw.frame_period:
            raise ValueError(f'frame_period is expected to'
                             f' {self.frame_period!s}'
                             f' but {raw.frame_period!s}')

        dim = feature.shape[-1]
        result = super().convert(delta_features(feature, DELTA_WINDOWS),
                                 **kwargs)
        if result.shape[-1] > dim:
            result = result[:, :dim]
        return result
コード例 #10
0
ファイル: multistream.py プロジェクト: shamanez/gantts
def recompute_delta_features(Y, Y_data_mean, Y_data_std,
                             windows,
                             stream_sizes=[180, 3, 1, 3],
                             has_dynamic_features=[True, True, False, True]):
    start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1]))
    end_indices = np.cumsum(stream_sizes)
    static_stream_sizes = get_static_stream_sizes(
        stream_sizes, has_dynamic_features, len(windows))

    for start_idx, end_idx, static_size, has_dynamic in zip(
            start_indices, end_indices, static_stream_sizes, has_dynamic_features):
        if has_dynamic:
            y_static = Y[:, start_idx:start_idx + static_size]
            Y[:, start_idx:end_idx] = P.delta_features(y_static, windows)

    return Y
コード例 #11
0
 def collect_features(self, wav_path):
     fs, x = wavfile.read(wav_path)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = P.trim_zeros_frames(spectrogram)
     if self.alpha is None:
         self.alpha = pysptk.util.mcepalpha(fs)
     mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha)
     # Drop 0-th coefficient
     mgc = mgc[:, 1:]
     # 50Hz cut-off MS smoothing
     hop_length = int(fs * (hp.frame_period * 0.001))
     modfs = fs / hop_length
     mgc = P.modspec_smoothing(mgc, modfs, cutoff=50)
     # Add delta
     mgc = P.delta_features(mgc, hp.windows)
     return mgc.astype(np.float32)
コード例 #12
0
 def collect_features(self, wav_path):
     fs, x = wavfile.read(wav_path)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = P.trim_zeros_frames(spectrogram)
     if self.alpha is None:
         self.alpha = pysptk.util.mcepalpha(fs)
     mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha)
     # Drop 0-th coefficient
     mgc = mgc[:, 1:]
     # 50Hz cut-off MS smoothing
     hop_length = int(fs * (hp.frame_period * 0.001))
     modfs = fs / hop_length
     mgc = P.modspec_smoothing(mgc, modfs, cutoff=50)
     # Add delta
     mgc = P.delta_features(mgc, hp.windows)
     return mgc.astype(np.float32)
コード例 #13
0
ファイル: multistream.py プロジェクト: zhyoung24/gantts
def recompute_delta_features(Y,
                             Y_data_mean,
                             Y_data_std,
                             windows,
                             stream_sizes=[180, 3, 1, 3],
                             has_dynamic_features=[True, True, False, True]):
    start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1]))
    end_indices = np.cumsum(stream_sizes)
    static_stream_sizes = get_static_stream_sizes(stream_sizes,
                                                  has_dynamic_features,
                                                  len(windows))

    for start_idx, end_idx, static_size, has_dynamic in zip(
            start_indices, end_indices, static_stream_sizes,
            has_dynamic_features):
        if has_dynamic:
            y_static = Y[:, start_idx:start_idx + static_size]
            Y[:, start_idx:end_idx] = P.delta_features(y_static, windows)

    return Y
コード例 #14
0
def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True):
    # GMM-based parameter generation is provided by the library in `baseline` module
    if disable_mlpg:
        # Force disable MLPG
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    fs, x = wavfile.read(src_path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    pdb.set_trace()

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      frame_period)

    return waveform
コード例 #15
0
ファイル: gmm.py プロジェクト: Emanuele93/INMCA
def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True):
    if disable_mlpg:
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    x, fs_ = sf.read(path_src)
    x = x.astype(np.float64)
    f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, time_axis, fs_)
    spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_)
    aperiodicity = pyworld.d4c(x, f0, time_axis, fs_)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_,
                                      frame_period)

    return waveform
コード例 #16
0
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(y_hat, R, hp.stream_sizes,
                                         hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs
コード例 #17
0
# Parameter generation
paramgen = MLPG(gmm, windows=windows, diff=True)

# Waveform generation for test set
for idx, path in enumerate(source.test_paths):
    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    # aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]

    mc = delta_features(mc, windows)
    since = time.time()
    mc = paramgen.transform(mc)
    print("{}, Elapsed time in conversion: {}s".format(idx,
                                                       time.time() - since))
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))

    mc[:, 0] = 0
    engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=80)
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(x, b)
    if not exists('resultsVC'):
        os.makedirs('resultsVC')
    wavfile.write(
        "resultsVC/{}_{}.wav".format(splitext(basename(path))[0], 'mlpg'), fs,
コード例 #18
0
    F=5 # Fold No
    '''
    for i in np.arange(0,460):
        E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i)
        #W_t=W_t#[np.newaxis,:,:,:]
        E_t=E_t#[np.newaxis,:,:]
        M_t=M_t#[np.newaxis,:,:]
        Youttrain.append(E_t)
        X_trainseq.append(M_t)
        TT_Train.append(TT)
    '''
    for i in np.arange(0,460):
        if  (((i+F)%10)==0):# Test
            E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i)
            E_t= delta_features(E_t, windows)
            M_t= delta_features(M_t, windows)
            #W_t=W_t[np.newaxis,:,:,np.newaxis]
            E_t=E_t[np.newaxis,:,:]
            M_t=M_t[np.newaxis,:,:]
            Youttest.append(E_t)
            X_testseq.append(M_t)
            TT_Test.append(TT)
            #print('Test '+str(i))
        elif (((i+F+1)%10)==0):# Validation
            E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i)
            E_t= delta_features(E_t, windows)
            M_t= delta_features(M_t, windows)
            #W_t=W_t#[np.newaxis,:,:,:]
            E_t=E_t#[np.newaxis,:,:]
            M_t=M_t #forward_process(M_t[:,:n_mcep])
コード例 #19
0
ファイル: evaluation_vc.py プロジェクト: shamanez/gantts
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(
            y_hat, R, hp.stream_sizes, hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(
        mc_static_pred, data_mean[:static_dim], data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(
            mc.astype(np.float64), alpha=alpha, fftlen=fftlen)
        waveform = pyworld.synthesize(
            f0, spectrogram, aperiodicity, fs, hp.frame_period)

    return waveform, inputs, outputs