def _pml_to_wav(pml_features,
                cfg,
                shift=0.005,
                dftlen=4096,
                nm_cont=False,
                verbose_level=0,
                mean_norm=None,
                std_norm=None,
                spec_type='mcep',
                pp_mcep=False,
                find_endpoint=False,
                threshold_db=0):
    # get the mean and variance, and denormalise
    if mean_norm is not None and std_norm is not None:
        std_tiled = np.tile(std_norm, (pml_features.shape[0], 1))
        mean_tiled = np.tile(mean_norm, (pml_features.shape[0], 1))
        pml_features = pml_features * std_tiled + mean_tiled

    # f0s is from flf0
    f0 = pml_features[:, cfg.acoustic_start_index['lf0']:cfg.
                      acoustic_start_index['lf0'] +
                      cfg.acoustic_in_dimension_dict['lf0']]

    f0 = np.squeeze(f0)  # remove the extra 1 dimension here
    f0[f0 > 0] = np.exp(f0[f0 > 0])
    ts = shift * np.arange(len(f0))
    f0s = np.vstack((ts, f0)).T

    # spec comes from fmcep or something else fwbnd
    if spec_type == 'mcep':
        mcep = pml_features[:, cfg.acoustic_start_index['mgc']:cfg.
                            acoustic_start_index['mgc'] +
                            cfg.acoustic_in_dimension_dict['mgc']]

        if pp_mcep:
            from lib.merlin import generate_pp
            mcep = generate_pp.mcep_postproc_sptk(mcep,
                                                  cfg.wav_sr,
                                                  dftlen=dftlen)

        spec = sp.mcep2spec(mcep, sp.bark_alpha(cfg.wav_sr), dftlen)
    elif spec_type == 'fwbnd':
        compspec = pml_features[:, cfg.acoustic_start_index['mgc']:cfg.
                                acoustic_start_index['mgc'] +
                                cfg.acoustic_in_dimension_dict['mgc']]
        spec = np.exp(sp.fwbnd2linbnd(compspec, cfg.wav_sr, dftlen))

        if pp_mcep:
            from lib.merlin import generate_pp
            mcep = sp.spec2mcep(spec * cfg.wav_sr, sp.bark_alpha(cfg.wav_sr),
                                256)
            mcep_pp = generate_pp.mcep_postproc_sptk(mcep,
                                                     cfg.wav_sr,
                                                     dftlen=dftlen)
            spec = sp.mcep2spec(
                mcep_pp, sp.bark_alpha(cfg.wav_sr), dftlen=dftlen) / cfg.wav_sr

    # NM comes from bap
    fwnm = pml_features[:, cfg.acoustic_start_index['bap']:cfg.
                        acoustic_start_index['bap'] +
                        cfg.acoustic_in_dimension_dict['bap']]

    nm = sp.fwbnd2linbnd(fwnm, cfg.wav_sr, dftlen)

    # use standard PML vocoder
    wav = synthesize(cfg.wav_sr,
                     f0s,
                     spec,
                     NM=nm,
                     nm_cont=nm_cont,
                     verbose=verbose_level)

    # clip the wav to the endpoint if required
    if find_endpoint:
        wav = wav[:audio.find_endpoint(wav, threshold_db=threshold_db)]

    # return the raw wav data
    return wav
Esempio n. 2
0
def synthesizef(fs,
                shift=0.005,
                dftlen=4096,
                ff0=None,
                flf0=None,
                fspec=None,
                flspec=None,
                ffwlspec=None,
                ffwcep=None,
                fmcep=None,
                fpdd=None,
                fmpdd=None,
                fnm=None,
                ffwnm=None,
                nm_cont=False,
                fsyn=None,
                verbose=1):
    '''
    Call the synthesis from python using file inputs and outputs
    '''
    if ff0:
        f0 = np.fromfile(ff0, dtype=np.float32)
    if flf0:
        f0 = np.fromfile(flf0, dtype=np.float32)
        f0[f0 > 0] = np.exp(f0[f0 > 0])
    ts = (shift) * np.arange(len(f0))
    f0s = np.vstack((ts, f0)).T

    if fspec:
        SPEC = np.fromfile(fspec, dtype=np.float32)
        SPEC = SPEC.reshape((len(f0), -1))
    if flspec:
        SPEC = np.fromfile(flspec, dtype=np.float32)
        SPEC = np.exp(SPEC.reshape((len(f0), -1)))
    if ffwlspec:
        FWLSPEC = np.fromfile(ffwlspec, dtype=np.float32)
        FWLSPEC = FWLSPEC.reshape((len(f0), -1))
        SPEC = np.exp(sp.fwbnd2linbnd(FWLSPEC, fs, dftlen, smooth=True))
    if ffwcep:
        FWCEP = np.fromfile(ffwcep, dtype=np.float32)
        FWCEP = FWCEP.reshape((len(f0), -1))
        SPEC = np.exp(sp.fwcep2loghspec(FWCEP, fs, dftlen))
    if fmcep:  # pragma: no cover
        # Cannot test this because it needs SPTK
        MCEP = np.fromfile(fmcep, dtype=np.float32)
        MCEP = MCEP.reshape((len(f0), -1))
        SPEC = sp.mcep2spec(MCEP, sp.bark_alpha(fs), dftlen)

    NM = None
    pdd_thresh = 0.75  # For this value, see:
    # G. Degottex and D. Erro, "A uniform phase representation for the harmonic model in speech synthesis applications," EURASIP, Journal on Audio, Speech, and Music Processing - Special Issue: Models of Speech - In Search of Better Representations, vol. 2014, iss. 1, p. 38, 2014.
    if fpdd:
        PDD = np.fromfile(fpdd, dtype=np.float32)
        PDD = PDD.reshape((len(f0), -1))
        NM = PDD.copy()
        NM[PDD < pdd_thresh] = 0.0
        NM[PDD > pdd_thresh] = 1.0
    if fmpdd:  # pragma: no cover
        # Cannot test this because it needs SPTK
        MPDD = np.fromfile(fmpdd, dtype=np.float32)
        MPDD = MPDD.reshape((len(f0), -1))
        PDD = sp.mcep2spec(MPDD, sp.bark_alpha(fs), dftlen)
        NM = PDD.copy()
        NM[PDD < pdd_thresh] = 0.0
        NM[PDD > pdd_thresh] = 1.0

    if fnm:
        NM = np.fromfile(fnm, dtype=np.float32)
        NM = NM.reshape((len(f0), -1))
    if ffwnm:
        FWNM = np.fromfile(ffwnm, dtype=np.float32)
        FWNM = FWNM.reshape((len(f0), -1))
        NM = sp.fwbnd2linbnd(FWNM, fs, dftlen)

    syn = synthesize(fs, f0s, SPEC, NM=NM, nm_cont=nm_cont, verbose=verbose)
    if fsyn:
        sp.wavwrite(fsyn, syn, fs, norm_max_ifneeded=True, verbose=verbose)

    return syn
Esempio n. 3
0
sys.path.append('/home/degottex/Research/CUED/Code')
from lib import sigproc as sp

if __name__ == "__main__":

    argpar = argparse.ArgumentParser()
    argpar.add_argument("bndspecfile",
                        default=None,
                        help="Input spectrum file")
    argpar.add_argument(
        "--nbbands",
        type=int,
        help="Number of bands in the warped spectral representation")
    argpar.add_argument("--dftlen",
                        default=4096,
                        type=int,
                        help="DFT size for the output spectrum")
    argpar.add_argument("--fs",
                        default=16000,
                        type=int,
                        help="Sampling frequency[Hz]")
    argpar.add_argument("specfile",
                        default=None,
                        help="Output warped spectrum file")
    args, unknown = argpar.parse_known_args()

    BNDSPEC = np.fromfile(args.bndspecfile, dtype=np.float32)
    BNDSPEC = BNDSPEC.reshape((-1, args.nbbands))
    SPEC = np.exp(sp.fwbnd2linbnd(BNDSPEC, args.fs, args.dftlen))
    SPEC.astype('float32').tofile(args.specfile)
Esempio n. 4
0
sys.path.append('/home/degottex/Research/CUED/Code')
from lib import sigproc as sp

if  __name__ == "__main__" :

    argpar = argparse.ArgumentParser()
    argpar.add_argument("specfile", default=None, help="Input spectrum file")
    argpar.add_argument("--dftlen", default=4096, type=int, help="DFT size for the input spectrum")
    argpar.add_argument("--fs", default=16000, type=int, help="Sampling frequency[Hz]")
    argpar.add_argument("--nbbands", type=int, help="Number of bands in the warped spectral representation")
    argpar.add_argument("bndfwspecfile", default=None, help="Output frequency warped spectrum file")
    args, unknown = argpar.parse_known_args()

    SPEC = np.fromfile(args.specfile, dtype=np.float32)
    SPEC = SPEC.reshape((-1, int(args.dftlen / 2)+1))
    FWSPEC = sp.linbnd2fwbnd(np.log(SPEC), args.fs, args.dftlen, args.nbbands)
    FWSPEC.astype('float32').tofile(args.bndfwspecfile)

    if 0:
        shift = 0.005
        SPECR = np.exp(sp.fwbnd2linbnd(FWSPEC, args.fs, args.dftlen))
        import matplotlib.pyplot as plt
        plt.ion()
        ts = shift*np.arange(SPEC.shape[0])
        plt.subplot(211)
        plt.imshow(sp.mag2db(SPEC).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2])
        plt.subplot(212)
        plt.imshow(sp.mag2db(SPECR).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2])
        from IPython.core.debugger import  Pdb; Pdb().set_trace()