Beispiel #1
0
def analysisf(
    fwav,
    shift=0.005,
    dftlen=4096,
    inf0txt_file=None,
    f0_min=60,
    f0_max=600,
    f0_file=None,
    f0_log=False,
    inf0bin_file=None,  # input f0 file in binary
    spec_file=None,
    spec_order=None,  # Mel-cepstral order for compressing the 
    # spectrum (typically 59; None: no compression)
    pdd_file=None,
    pdd_order=None,  # Mel-cepstral order for compressing PDD
    # spectrum (typically 59; None: no compression)
    nm_file=None,
    nm_nbbnds=None,  # Number of mel-bands in the compressed mask
    # (None: no compression)
    verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    # read input f0 file in float32 (ljuvela)
    if inf0bin_file:
        f0s = np.fromfile(inf0bin_file, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              verbose=verbose)

    if f0_file:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, f0_file))
        if f0_log: f0_values = np.log(f0_values)
        f0_values.astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(
                NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose > 2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)
Beispiel #2
0
def analysisf(
        fwav,
        shift=0.005,
        dftlen=4096,
        finf0txt=None,
        f0estimator='REAPER',
        f0_min=60,
        f0_max=600,
        ff0=None,
        f0_log=False,
        finf0bin=None,  # input f0 file in binary
        fspec=None,
        spec_mceporder=None,  # Mel-cepstral order for compressing the spectrogram (typically 59; None: no compression)
        spec_fwceporder=None,  # Frequency warped cepstral order (very similar to above, just faster and less precise) (typically 59; None: no compression)
        spec_nbfwbnds=None,  # Number of mel-bands in the compressed half log spectrogram (None: no compression)
        spec_nblinlogbnds=None,  # Number of linear-bands in the compressed half log spectrogram (None: no compression)
        fpdd=None,
        pdd_mceporder=None,  # Mel-cepstral order for compressing PDD spectrogram (typically 59; None: no compression)
        fnm=None,
        nm_nbfwbnds=None,  # Number of mel-bands in the compressed noise mask (None: no compression)
        preproc_fs=None,  # Resample the waveform
        preproc_hp=None,  # Cut-off of high-pass filter (e.g. 20Hz)
        verbose=1):

    wav, fs, _ = sp.wavread(fwav)

    if verbose > 0:
        print(
            'PML Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'
            .format(len(wav) / float(fs), fs, f0_min, f0_max, shift, dftlen))

    if (not preproc_fs is None) and (preproc_fs != fs):
        if verbose > 0:
            print(
                '    Resampling the waveform (new fs={}Hz)'.format(preproc_fs))
        wav = sp.resample(wav, fs, preproc_fs, method=2, deterministic=True)
        fs = preproc_fs

    if not preproc_hp is None:
        if verbose > 0:
            print('    High-pass filter the waveform (cutt-off={}Hz)'.format(
                preproc_hp))
        b, a = sig.butter(4, preproc_hp / (fs / 0.5), btype='high')
        wav = sig.filtfilt(b, a, wav)

    f0s = None
    if finf0txt:
        f0s = np.loadtxt(finf0txt)

    # read input f0 file in float32 (ljuvela)
    if finf0bin:
        f0s = np.fromfile(finf0bin, dtype=np.float32)

    f0s = analysis_f0postproc(wav,
                              fs,
                              f0s,
                              f0_min=f0_min,
                              f0_max=f0_max,
                              shift=shift,
                              f0estimator=f0estimator,
                              verbose=verbose)
    if verbose > 2: f0sori = f0s.copy()

    if ff0:
        f0_values = f0s[:, 1]
        if verbose > 0:
            print('    Output F0 {} in: {}'.format(f0_values.shape, ff0))
        if f0_log: f0_values = np.log(f0_values)
        if os.path.dirname(ff0) != '' and (not os.path.isdir(
                os.path.dirname(ff0))):
            os.mkdir(os.path.dirname(ff0))
        f0_values.astype(np.float32).tofile(ff0)

    SPEC = None
    if fspec:
        SPEC = analysis_spec(wav,
                             fs,
                             f0s,
                             shift=shift,
                             dftlen=dftlen,
                             verbose=verbose)
        if verbose > 2: SPECori = SPEC.copy()
        if not spec_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_mceporder)
        if not spec_fwceporder is None:
            SPEC = sp.loghspec2fwcep(np.log(abs(SPEC)),
                                     fs,
                                     order=spec_fwceporder)
        if not spec_nbfwbnds is None:
            SPEC = sp.linbnd2fwbnd(np.log(abs(SPEC)), fs, dftlen,
                                   spec_nbfwbnds)
        if not spec_nblinlogbnds is None:
            SPEC = np.log(abs(SPEC))
        if verbose > 0:
            print('    Output Spectrogram size={} in: {}'.format(
                SPEC.shape, fspec))
        if os.path.dirname(fspec) != '' and (not os.path.isdir(
                os.path.dirname(fspec))):
            os.mkdir(os.path.dirname(fspec))
        SPEC.astype(np.float32).tofile(fspec)

    PDD = None
    if fpdd or fnm:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)
        if verbose > 2: PDDori = PDD.copy()

    if fpdd:
        if not pdd_mceporder is None:  # pragma: no cover
            # Cannot test this because it needs SPTK
            # If asked, compress PDD
            PDD[PDD < 0.001] = 0.001  # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_mceporder)
        if verbose > 0:
            print('    Output PDD size={} in: {}'.format(PDD.shape, fpdd))
        if os.path.dirname(fpdd) != '' and (not os.path.isdir(
                os.path.dirname(fpdd))):
            os.mkdir(os.path.dirname(fpdd))
        PDD.astype(np.float32).tofile(fpdd)

    NM = None
    if verbose > 2: NMori = None
    if fnm:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        if verbose > 2: NMori = NM.copy()
        # If asked, compress NM
        if nm_nbfwbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbfwbnds)
        if verbose > 0:
            print('    Output Noise Mask size={} in: {}'.format(NM.shape, fnm))
        if os.path.dirname(fnm) != '' and (not os.path.isdir(
                os.path.dirname(fnm))):
            os.mkdir(os.path.dirname(fnm))
        NM.astype(np.float32).tofile(fnm)

    if verbose > 2:
        plot_features(wav=wav,
                      fs=fs,
                      f0s=f0sori,
                      SPEC=SPECori,
                      PDD=PDDori,
                      NM=NMori)  # pragma: no cover
Beispiel #3
0
sys.path.append('/home/degottex/Research/CUED/Code')
import sigproc as sp

if  __name__ == "__main__" :

    argpar = argparse.ArgumentParser()
    argpar.add_argument("specfile", default=None, help="Input spectrum file")
    argpar.add_argument("--dftlen", default=4096, type=int, help="DFT size for the input spectrum")
    argpar.add_argument("--fs", default=16000, type=int, help="Sampling frequency[Hz]")
    argpar.add_argument("--nbbands", type=int, help="Number of bands in the warped spectral representation")
    argpar.add_argument("bndfwspecfile", default=None, help="Output frequency warped spectrum file")
    args, unknown = argpar.parse_known_args()

    SPEC = np.fromfile(args.specfile, dtype=np.float32)
    SPEC = SPEC.reshape((-1, int(args.dftlen / 2)+1))
    FWSPEC = sp.linbnd2fwbnd(np.log(SPEC), args.fs, args.dftlen, args.nbbands)
    FWSPEC.astype('float32').tofile(args.bndfwspecfile)

    if 0:
        shift = 0.005
        SPECR = np.exp(sp.fwbnd2linbnd(FWSPEC, args.fs, args.dftlen))
        import matplotlib.pyplot as plt
        plt.ion()
        ts = shift*np.arange(SPEC.shape[0])
        plt.subplot(211)
        plt.imshow(sp.mag2db(SPEC).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2])
        plt.subplot(212)
        plt.imshow(sp.mag2db(SPECR).T, origin='lower', aspect='auto', interpolation='none', cmap='jet', extent=[0.0, ts[-1], 0.0, args.fs/2])
        from IPython.core.debugger import  Pdb; Pdb().set_trace()
Beispiel #4
0
def analysisf(fwav
    , shift=0.005
    , dftlen=4096
    , inf0txt_file=None, f0_min=60, f0_max=600, f0_file=None
    , spec_file=None, spec_order=None # Mel-cepstral order for compressing the 
                            # spectrum (typically 59; None: no compression)
    , pdd_file=None, pdd_order=None   # Mel-cepstral order for compressing PDD
                            # spectrum (typically 59; None: no compression)
    , nm_file=None, nm_nbbnds=None  # Number of mel-bands in the compressed mask
                            # (None: no compression)
    , verbose=1):

    wav, fs, enc = sp.wavread(fwav)

    if verbose>0: print('PM Analysis (dur={:.3f}s, fs={}Hz, f0 in [{},{}]Hz, shift={}s, dftlen={})'.format(len(wav)/float(fs), fs, f0_min, f0_max, shift, dftlen))

    f0s = None
    if inf0txt_file:
        f0s = np.loadtxt(inf0txt_file)

    f0s = analysis_f0postproc(wav, fs, f0s, f0_min=f0_min, f0_max=f0_max, shift=shift, verbose=verbose)

    if f0_file:
        if verbose>0: print('    Output F0 {} in: {}'.format(f0s[:,1].shape, f0_file))
        f0s[:,1].astype(np.float32).tofile(f0_file)

    SPEC = None
    if spec_file:
        SPEC = analysis_spec(wav, fs, f0s, shift=shift, dftlen=dftlen, verbose=verbose)
        if not spec_order is None:
            SPEC = sp.spec2mcep(SPEC, sp.bark_alpha(fs), order=spec_order)
        if verbose>0: print('    Output Spectrogram size={} in: {}'.format(SPEC.shape, spec_file))
        SPEC.astype(np.float32).tofile(spec_file)

    PDD = None
    if pdd_file or nm_file:
        PDD = analysis_pdd(wav, fs, f0s, dftlen=dftlen, verbose=verbose)

    if pdd_file:
        if not pdd_order is None:
            # If asked, compress PDD
            PDD[PDD<0.001] = 0.001 # From COVAREP
            PDD = sp.spec2mcep(PDD, sp.bark_alpha(fs), pdd_order)
        if verbose>0: print('    Output PDD size={} in: {}'.format(PDD.shape, pdd_file))
        PDD.astype(np.float32).tofile(pdd_file)

    NM = None
    if nm_file:
        NM = analysis_nm(wav, fs, f0s, PDD, verbose=verbose)
        # If asked, compress NM
        if nm_nbbnds:
            # If asked, compress the noise mask using a number of mel bands
            NM = sp.linbnd2fwbnd(NM, fs, dftlen, nm_nbbnds)
            # Need to force to binary values because we don't use ambiguous values,
            # we use the binary version at synthesis time.
            NM[NM>=0.5] = 1.0
            NM[NM<0.5] = 0.0
        if verbose>0: print('    Output Noise Mask size={} in: {}'.format(NM.shape, nm_file))
        NM.astype(np.float32).tofile(nm_file)

    if verbose>2:
        plot_features(wav=wav, fs=fs, f0s=f0s, SPEC=SPEC, PDD=PDD, NM=NM)