def analysis_spec( wav, fs, f0s, shift=0.005, # Usually 5ms dftlen=4096, # You can adapt this one according to your pipeline verbose=1): ''' Estimate the amplitude spectral envelope. ''' if sp.pystraight.isanalysiseavailable(): # Use STRAIGHT's envelope if available (as in PML's publications) SPEC = sigproc.pystraight.analysis_spec(wav, fs, f0s, shift, dftlen, keeplen=True) elif sigproc.interfaces.worldvocoder_is_available(): warnings.warn( '''\n\nWARNING: straight_mcep is not available, but WORLD vocoder has been detected and will be used instead. Note that PML-related publications present results using STRAIGHT vocoder. The results might be thus different. ''', RuntimeWarning) # Then try WORLD vocoder import pyworld #_f0, ts = pyworld.dio(x, fs, frame_period=shift*1000) # raw pitch extractor # Use REAPER instead pwts = np.ascontiguousarray(f0s[:, 0]) pwf0 = pyworld.stonemask(wav, np.ascontiguousarray(f0s[:, 1]), pwts, fs) # pitch refinement SPEC = pyworld.cheaptrick( wav, pwf0, pwts, fs, fft_size=dftlen) # extract smoothed spectrogram SPEC = 10.0 * np.sqrt( SPEC ) # TODO Best gain correction I could find. Hard to find the good one between PML and WORLD different syntheses else: # Estimate the sinusoidal parameters at regular intervals in order # to build the amplitude spectral envelope sinsreg, f0sps = sp.sinusoidal.estimate_sinusoidal_params( wav, fs, f0s, nbper=3, quadraticfit=True, verbose=verbose - 1) warnings.warn( '''\n\nWARNING: Neither straight_mcep nor WORLD's cheaptrick spectral envelope estimators are available. Thus, a SIMPLISTIC Linear interpolation will be used for the spectral envelope. Do _NOT_ use this envelope for speech synthesis! Please use a better one (e.g. STRAIGHT's). If you use this simplistic envelope, the TTS quality will be lower than that in the results reported. ''', RuntimeWarning) SPEC = sp.multi_linear(sinsreg, fs, dftlen) SPEC = np.exp(SPEC) * np.sqrt(float(dftlen)) return SPEC
def analysis_spec(wav, fs, f0s, shift=0.005, # Usually 5ms dftlen=4096, # You can adapt this one according to your pipeline verbose=1): ''' Estimate the amplitude spectral envelope. ''' if 0 and sigproc.pystraight.isanalysiseavailable(): SPEC = sigproc.pystraight.analysis_spec(wav, fs, f0s, shift, dftlen, keeplen=True) else: # Estimate the sinusoidal parameters at regular intervals in order # to build the amplitude spectral envelope sinsreg, f0sps = sp.sinusoidal.estimate_sinusoidal_params(wav, fs, f0s, nbper=3, quadraticfit=True, verbose=verbose-1) # Estimate the amplitude spectral envelope print(" WARNING: straight_mcep is unavailable.") print(" A SIMPLISTIC Linear interpolation is used for the amplitude envelope.") print(" Please replace it with a better one (e.g. STRAIGHT's).") print(" If you use this simplistic envelope, the TTS quality is") print(" very likely to be lower than that in the results reported.") SPEC = sp.multi_linear(sinsreg, fs, dftlen) SPEC = np.exp(SPEC)*np.sqrt(dftlen) return SPEC