def _pml_to_wav(pml_features, cfg, shift=0.005, dftlen=4096, nm_cont=False, verbose_level=0, mean_norm=None, std_norm=None, spec_type='mcep', pp_mcep=False, find_endpoint=False, threshold_db=0): # get the mean and variance, and denormalise if mean_norm is not None and std_norm is not None: std_tiled = np.tile(std_norm, (pml_features.shape[0], 1)) mean_tiled = np.tile(mean_norm, (pml_features.shape[0], 1)) pml_features = pml_features * std_tiled + mean_tiled # f0s is from flf0 f0 = pml_features[:, cfg.acoustic_start_index['lf0']:cfg. acoustic_start_index['lf0'] + cfg.acoustic_in_dimension_dict['lf0']] f0 = np.squeeze(f0) # remove the extra 1 dimension here f0[f0 > 0] = np.exp(f0[f0 > 0]) ts = shift * np.arange(len(f0)) f0s = np.vstack((ts, f0)).T # spec comes from fmcep or something else fwbnd if spec_type == 'mcep': mcep = pml_features[:, cfg.acoustic_start_index['mgc']:cfg. acoustic_start_index['mgc'] + cfg.acoustic_in_dimension_dict['mgc']] if pp_mcep: from lib.merlin import generate_pp mcep = generate_pp.mcep_postproc_sptk(mcep, cfg.wav_sr, dftlen=dftlen) spec = sp.mcep2spec(mcep, sp.bark_alpha(cfg.wav_sr), dftlen) elif spec_type == 'fwbnd': compspec = pml_features[:, cfg.acoustic_start_index['mgc']:cfg. acoustic_start_index['mgc'] + cfg.acoustic_in_dimension_dict['mgc']] spec = np.exp(sp.fwbnd2linbnd(compspec, cfg.wav_sr, dftlen)) if pp_mcep: from lib.merlin import generate_pp mcep = sp.spec2mcep(spec * cfg.wav_sr, sp.bark_alpha(cfg.wav_sr), 256) mcep_pp = generate_pp.mcep_postproc_sptk(mcep, cfg.wav_sr, dftlen=dftlen) spec = sp.mcep2spec( mcep_pp, sp.bark_alpha(cfg.wav_sr), dftlen=dftlen) / cfg.wav_sr # NM comes from bap fwnm = pml_features[:, cfg.acoustic_start_index['bap']:cfg. acoustic_start_index['bap'] + cfg.acoustic_in_dimension_dict['bap']] nm = sp.fwbnd2linbnd(fwnm, cfg.wav_sr, dftlen) # use standard PML vocoder wav = synthesize(cfg.wav_sr, f0s, spec, NM=nm, nm_cont=nm_cont, verbose=verbose_level) # clip the wav to the endpoint if required if find_endpoint: wav = wav[:audio.find_endpoint(wav, threshold_db=threshold_db)] # return the raw wav data return wav
def synthesizef(fs, shift=0.005, dftlen=4096, ff0=None, flf0=None, fspec=None, flspec=None, ffwlspec=None, ffwcep=None, fmcep=None, fpdd=None, fmpdd=None, fnm=None, ffwnm=None, nm_cont=False, fsyn=None, verbose=1): ''' Call the synthesis from python using file inputs and outputs ''' if ff0: f0 = np.fromfile(ff0, dtype=np.float32) if flf0: f0 = np.fromfile(flf0, dtype=np.float32) f0[f0 > 0] = np.exp(f0[f0 > 0]) ts = (shift) * np.arange(len(f0)) f0s = np.vstack((ts, f0)).T if fspec: SPEC = np.fromfile(fspec, dtype=np.float32) SPEC = SPEC.reshape((len(f0), -1)) if flspec: SPEC = np.fromfile(flspec, dtype=np.float32) SPEC = np.exp(SPEC.reshape((len(f0), -1))) if ffwlspec: FWLSPEC = np.fromfile(ffwlspec, dtype=np.float32) FWLSPEC = FWLSPEC.reshape((len(f0), -1)) SPEC = np.exp(sp.fwbnd2linbnd(FWLSPEC, fs, dftlen, smooth=True)) if ffwcep: FWCEP = np.fromfile(ffwcep, dtype=np.float32) FWCEP = FWCEP.reshape((len(f0), -1)) SPEC = np.exp(sp.fwcep2loghspec(FWCEP, fs, dftlen)) if fmcep: # pragma: no cover # Cannot test this because it needs SPTK MCEP = np.fromfile(fmcep, dtype=np.float32) MCEP = MCEP.reshape((len(f0), -1)) SPEC = sp.mcep2spec(MCEP, sp.bark_alpha(fs), dftlen) NM = None pdd_thresh = 0.75 # For this value, see: # G. Degottex and D. Erro, "A uniform phase representation for the harmonic model in speech synthesis applications," EURASIP, Journal on Audio, Speech, and Music Processing - Special Issue: Models of Speech - In Search of Better Representations, vol. 2014, iss. 1, p. 38, 2014. if fpdd: PDD = np.fromfile(fpdd, dtype=np.float32) PDD = PDD.reshape((len(f0), -1)) NM = PDD.copy() NM[PDD < pdd_thresh] = 0.0 NM[PDD > pdd_thresh] = 1.0 if fmpdd: # pragma: no cover # Cannot test this because it needs SPTK MPDD = np.fromfile(fmpdd, dtype=np.float32) MPDD = MPDD.reshape((len(f0), -1)) PDD = sp.mcep2spec(MPDD, sp.bark_alpha(fs), dftlen) NM = PDD.copy() NM[PDD < pdd_thresh] = 0.0 NM[PDD > pdd_thresh] = 1.0 if fnm: NM = np.fromfile(fnm, dtype=np.float32) NM = NM.reshape((len(f0), -1)) if ffwnm: FWNM = np.fromfile(ffwnm, dtype=np.float32) FWNM = FWNM.reshape((len(f0), -1)) NM = sp.fwbnd2linbnd(FWNM, fs, dftlen) syn = synthesize(fs, f0s, SPEC, NM=NM, nm_cont=nm_cont, verbose=verbose) if fsyn: sp.wavwrite(fsyn, syn, fs, norm_max_ifneeded=True, verbose=verbose) return syn
sys.path.append('/home/degottex/Research/CUED/Code') from lib import sigproc as sp if __name__ == "__main__": argpar = argparse.ArgumentParser() argpar.add_argument("bndspecfile", default=None, help="Input spectrum file") argpar.add_argument( "--nbbands", type=int, help="Number of bands in the warped spectral representation") argpar.add_argument("--dftlen", default=4096, type=int, help="DFT size for the output spectrum") argpar.add_argument("--fs", default=16000, type=int, help="Sampling frequency[Hz]") argpar.add_argument("specfile", default=None, help="Output warped spectrum file") args, unknown = argpar.parse_known_args() BNDSPEC = np.fromfile(args.bndspecfile, dtype=np.float32) BNDSPEC = BNDSPEC.reshape((-1, args.nbbands)) SPEC = np.exp(sp.fwbnd2linbnd(BNDSPEC, args.fs, args.dftlen)) SPEC.astype('float32').tofile(args.specfile)
sys.path.append('/home/degottex/Research/CUED/Code') from lib import sigproc as sp if __name__ == "__main__" : argpar = argparse.ArgumentParser() argpar.add_argument("specfile", default=None, help="Input spectrum file") argpar.add_argument("--dftlen", default=4096, type=int, help="DFT size for the input spectrum") argpar.add_argument("--fs", default=16000, type=int, help="Sampling frequency[Hz]") argpar.add_argument("--nbbands", type=int, help="Number of bands in the warped spectral representation") argpar.add_argument("bndfwspecfile", default=None, help="Output frequency warped spectrum file") args, unknown = argpar.parse_known_args() SPEC = np.fromfile(args.specfile, dtype=np.float32) SPEC = SPEC.reshape((-1, int(args.dftlen / 2)+1)) FWSPEC = sp.linbnd2fwbnd(np.log(SPEC), args.fs, args.dftlen, args.nbbands) FWSPEC.astype('float32').tofile(args.bndfwspecfile) if 0: shift = 0.005 SPECR = np.exp(sp.fwbnd2linbnd(FWSPEC, args.fs, args.dftlen)) import matplotlib.pyplot as plt plt.ion() ts = shift*np.arange(SPEC.shape[0]) plt.subplot(211) plt.imshow(sp.mag2db(SPEC).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2]) plt.subplot(212) plt.imshow(sp.mag2db(SPECR).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2]) from IPython.core.debugger import Pdb; Pdb().set_trace()