Example #1
0
def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0):

    #Pre:
    temp_sp = lu.ins_pid('temp.sp')
    temp_mgc = lu.ins_pid('temp.mgc')

    # Writing input data:
    lu.write_binfile(m_sp, temp_sp)

    if fft_len is 0:  # case fft automatic
        fft_len = 2 * (np.size(m_sp, 1) - 1)

    # MCEP:
    curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % (
        alpha, n_coeffs - 1, fft_len, in_type, temp_sp, temp_mgc)
    call(curr_cmd, shell=True)

    # Read MGC File:
    m_mgc = lu.read_binfile(temp_mgc, n_coeffs)

    # Deleting temp files:
    os.remove(temp_sp)
    os.remove(temp_mgc)

    #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc

    return m_mgc
Example #2
0
def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0):

    #Pre:
    temp_sp  =  lu.ins_pid('temp.sp')
    temp_mgc =  lu.ins_pid('temp.mgc')
    
    # Writing input data:
    lu.write_binfile(m_sp, temp_sp)

    if fft_len is 0: # case fft automatic
        fft_len = 2*(np.size(m_sp,1) - 1)

    # MCEP:      
    curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % (alpha, n_coeffs-1, fft_len, in_type, temp_sp, temp_mgc)
    call(curr_cmd, shell=True)
    
    # Read MGC File:
    m_mgc = lu.read_binfile(temp_mgc , n_coeffs)
    
    # Deleting temp files:
    os.remove(temp_sp)
    os.remove(temp_mgc)
    
    #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc
    
    return m_mgc
Example #3
0
def wavgen_magphase(gen_dir, file_id_list, cfg, logger):

    # Import MagPhase and libraries:
    sys.path.append(cfg.magphase_bindir)
    import libutils as lu
    import libaudio as la
    import magphase as mp

    nfiles = len(file_id_list)
    for nxf in xrange(nfiles):
        filename_token = file_id_list[nxf]
        logger.info('Creating waveform for %4d of %4d: %s' %
                    (nxf + 1, nfiles, filename_token))

        # Post-Filter:
        if cfg.do_post_filtering and not cfg.use_magphase_pf:

            mcep_file = os.path.join(gen_dir, filename_token + '.mcep')
            mcep_file_pf = os.path.join(gen_dir, filename_token + '_pf.mcep')
            mag_file = os.path.join(gen_dir, filename_token + '.mag')

            # Mag to Mcep:
            m_mag_mel_log = lu.read_binfile(mag_file, dim=cfg.mag_dim)
            m_mcep = la.rceps(m_mag_mel_log, in_type='log', out_type='compact')
            lu.write_binfile(m_mcep, mcep_file)

            # Apply post-filter:
            post_filter(mcep_file, mcep_file_pf, cfg.mag_dim, cfg.pf_coef,
                        cfg.fw_alpha, cfg.co_coef, cfg.fl, gen_dir, cfg)

            # Mcep to Mag:
            m_mcep_pf = lu.read_binfile(mcep_file_pf, dim=cfg.mag_dim)
            m_mag_mel_log_pf = la.mcep_to_sp_cosmat(m_mcep_pf,
                                                    cfg.mag_dim,
                                                    alpha=0.0,
                                                    out_type='log')

            # Protection agains possible nans:
            m_mag_mel_log_pf[np.isnan(m_mag_mel_log_pf)] = la.MAGIC

            # Saving to file:
            lu.write_binfile(m_mag_mel_log_pf, mag_file)

            # Removing temp files:
            os.remove(mcep_file)
            os.remove(mcep_file_pf)

        # Synthesis:
        mp.synthesis_from_acoustic_modelling(
            gen_dir,
            filename_token,
            gen_dir,
            cfg.mag_dim,
            cfg.real_dim,
            cfg.sr,
            b_postfilter=(cfg.do_post_filtering and cfg.use_magphase_pf))

    return
def feat_extraction(wav_file, out_feats_dir):

    # Constants:

    fft_len = 4096
    mvf = 4500
    nbins_mel = 60
    nbins_phase = 45

    # Parsing path:
    file_name_token = os.path.basename(os.path.splitext(wav_file)[0])

    # Display:
    print("Analysing file: " + file_name_token + '.wav' +
          '................................')

    # Files setup:
    est_file = os.path.join(out_feats_dir, file_name_token + '.est')

    # Epochs detection:
    la.reaper(wav_file, est_file)

    # Feature extraction:
    m_mag_mel_log, m_real_mel, m_imag_mel, v_shift, v_lf0, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files2(
        wav_file,
        est_file,
        fft_len,
        mvf,
        f0_type='lf0',
        mag_mel_nbins=nbins_mel,
        cmplx_ph_mel_nbins=nbins_phase)

    if fs != fs_expected:
        print(
            "The wavefile's sample rate (%dHz) does not match the expected sample rate (%dHz)."
            % (fs, fs_expected))
        sys.exit(1)

    # Zeros for unvoiced segments in phase features:
    v_voi = (np.exp(v_lf0) > 5.0).astype(int)  # 5.0: tolerance (just in case)
    m_real_mel_zeros = m_real_mel * v_voi[:, None]
    m_imag_mel_zeros = m_imag_mel * v_voi[:, None]

    # Saving features:
    lu.write_binfile(m_mag_mel_log,
                     out_feats_dir + '/' + file_name_token + '.mag')
    lu.write_binfile(m_real_mel_zeros,
                     out_feats_dir + '/' + file_name_token + '.real')
    lu.write_binfile(m_imag_mel_zeros,
                     out_feats_dir + '/' + file_name_token + '.imag')
    lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0')

    # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin.
    lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift')

    return
def feat_extraction(in_wav_dir,
                    file_name_token,
                    out_feats_dir,
                    fft_len,
                    mvf,
                    nbins_mel=60,
                    nbins_phase=45):

    # Display:
    print("Analysing file: " + file_name_token + '.wav')

    # Files setup:
    wav_file = in_wav_dir + '/' + file_name_token + '.wav'
    est_file = out_feats_dir + '/' + file_name_token + '.est'

    # Epochs detection:
    la.reaper(wav_file, est_file)

    # Feature extraction:
    m_mag_mel_log, m_real_mel, m_imag_mel, v_shift, v_lf0, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files2(
        wav_file,
        est_file,
        fft_len,
        mvf,
        f0_type='lf0',
        mag_mel_nbins=nbins_mel,
        cmplx_ph_mel_nbins=nbins_phase)

    # Zeros for unvoiced segments in phase features:
    v_voi = (np.exp(v_lf0) > 5.0).astype(int)  # 5.0: tolerance (just in case)
    m_real_mel_zeros = m_real_mel * v_voi[:, None]
    m_imag_mel_zeros = m_imag_mel * v_voi[:, None]

    # Saving features:
    lu.write_binfile(m_mag_mel_log,
                     out_feats_dir + '/' + file_name_token + '.mag')
    lu.write_binfile(m_real_mel_zeros,
                     out_feats_dir + '/' + file_name_token + '.real')
    lu.write_binfile(m_imag_mel_zeros,
                     out_feats_dir + '/' + file_name_token + '.imag')
    lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0')

    # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin.
    lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift')

    return
Example #6
0
    load_config(a.config)
    from __init__ import *

    with open(a.senlst) as f:
        sentences = [l.rstrip() for l in f if l]

    hts2 = [path.join(HTS2DIR, s + '.lab') for s in sentences]
    lab1 = [path.join(LAB1DIR, s + '.lab') for s in sentences]
    lab2 = [path.join(LAB2DIR, s + '.lab') for s in sentences]
    lab3 = [path.join(LAB3DIR, s + '.lab') for s in sentences]

    binarizer = HTSLabelNormalisation(
        question_file_name=path.join(RESDIR, '600.hed'))
    binarizer.perform_normalisation(hts2, lab1)

    remover = SilenceRemover(n_cmp=binarizer.dimension,
                             silence_pattern=['*-#+*'])
    remover.remove_silence(lab1, hts2, lab2)

    normalizer = MinMaxNormalisation(feature_dimension=binarizer.dimension,
                                     min_value=0.01,
                                     max_value=0.99)
    normalizer.find_min_max_values(lab2)
    print1(normalizer.min_vector)
    print1(normalizer.max_vector)
    lu.write_binfile(normalizer.min_vector, path.join(LABSDIR, 'min'))
    lu.write_binfile(normalizer.max_vector, path.join(LABSDIR, 'max'))

    # normalizer.normalise_data(lab2, lab3)
Example #7
0
def magphase_analysis(wav_file, outdir='', fft_len=None, nbins_mel=60, nbins_phase=45, pm_dir='', skip_low=False, cepstra=False):
    '''
    Function to combine Felipe's analysis_lossless and analysis_compressed with 
    little redundancy, and storing pitchmark files.
    '''

    try:
        outdir_hi = os.path.join(outdir, 'high')
        outdir_lo = os.path.join(outdir, 'low')

        file_id = os.path.basename(wav_file).split(".")[0]

        # Read file:
        v_sig, fs = sf.read(wav_file)

        if not pm_dir:
            # Epoch detection:
            est_file = os.path.join(outdir, 'pm', file_id + '.pm') 
            la.reaper(wav_file, est_file)
        else:
            est_file = os.path.join(pm_dir, file_id + '.pm') 
        v_pm_sec, v_voi = la.read_reaper_est_file(est_file, check_len_smpls=len(v_sig), fs=fs)
        v_pm_smpls = v_pm_sec * fs



        # Spectral analysis:
        m_fft, v_shift = mp.analysis_with_del_comp_from_pm(v_sig, fs, v_pm_smpls, fft_len=fft_len)

        # Getting high-ress magphase feats:
        m_mag, m_real, m_imag, v_f0 = mp.compute_lossless_feats(m_fft, v_shift, v_voi, fs)

        ### write high-dimensional data:
        lu.write_binfile(m_mag, os.path.join(outdir_hi, 'mag', file_id + '.mag'))
        lu.write_binfile(m_real, os.path.join(outdir_hi, 'real', file_id + '.real'))
        lu.write_binfile(m_imag, os.path.join(outdir_hi, 'imag', file_id + '.imag'))
        lu.write_binfile(v_f0, os.path.join(outdir_hi, 'f0', file_id + '.f0'))
        lu.write_binfile(v_shift, os.path.join(outdir, 'shift', file_id + '.shift'))

        if not skip_low:
            # Low dimension (Formatting for Acoustic Modelling):
            m_mag_mel_log, m_real_mel, m_imag_mel, v_lf0_smth = mp.format_for_modelling(m_mag, m_real, m_imag, v_f0, fs, mag_dim=nbins_mel, phase_dim=nbins_phase)
            # fft_len = 2*(np.size(m_mag,1) - 1)

            ### write low-dim data:
            lu.write_binfile(m_mag_mel_log, os.path.join(outdir_lo, 'mag', file_id + '.mag'))
            lu.write_binfile(m_real_mel, os.path.join(outdir_lo, 'real', file_id + '.real'))
            lu.write_binfile(m_imag_mel, os.path.join(outdir_lo, 'imag', file_id + '.imag'))
            lu.write_binfile(v_lf0_smth, os.path.join(outdir_lo, 'lf0', file_id + '.lf0'))

        if cepstra:
            alpha = {48000: 0.77, 16000: 58}[fs]
            m_mag_mcep = la.sp_to_mcep(m_mag, n_coeffs=nbins_mel, alpha=alpha, in_type=3)
            m_real_mcep = la.sp_to_mcep(m_real, n_coeffs=nbins_phase, alpha=alpha, in_type=2)
            m_imag_mcep = la.sp_to_mcep(m_imag, n_coeffs=nbins_phase, alpha=alpha, in_type=2)

            lu.write_binfile(m_mag_mcep, os.path.join(outdir_lo, 'mag_cc', file_id + '.mag_cc'))
            lu.write_binfile(m_real_mcep, os.path.join(outdir_lo, 'real_cc', file_id + '.real_cc'))
            lu.write_binfile(m_imag_mcep, os.path.join(outdir_lo, 'imag_cc', file_id + '.imag_cc'))
    except KeyboardInterrupt, e:
        pass
Example #8
0
            mag = lu.read_binfile(path.join(ACO2DIR, s + '.mag'),
                                  dim=ax.MAG_DIM)
            real = lu.read_binfile(path.join(ACO2DIR, s + '.real'),
                                   dim=ax.REAL_DIM)
            imag = lu.read_binfile(path.join(ACO2DIR, s + '.imag'),
                                   dim=ax.IMAG_DIM)
            lf0 = lu.read_binfile(path.join(ACO2DIR, s+'.lf0'), dim=ax.LF0_DIM)\
                    .reshape([-1,ax.LF0_DIM])
            x = ax.acoustic(mag=mag, real=real, imag=imag, lf0=lf0)
            if ds.EXP_F0:
                x[:, ax.LF0] = np.exp(x[:, ax.LF0])
                v = ax.voicing(x, 1.0)
            else:
                v = ax.voicing(x, 0.0)
            x = ax.interpolate_f0(x)

            if ds.USE_DELTA:
                dx = ax.velocity(x)
                ddx = ax.acceleration(x)
                x = np.concatenate([x, dx, ddx, v], axis=1)
            else:
                x = np.concatenate([x, v], axis=1)
            lu.write_binfile(x, path.join(ACO3DIR, s + '.aco'))
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            print2(e)
            pass
        else:
            print1(s)
Example #9
0
                x[:,ax.LF0][v <= mean[-1]] = 0.0
                x[:,ax.LF0] = np.log(x[:,ax.LF0])
            else:
                x[:,ax.LF0][v <= mean[-1]] = float('-Inf')

            # if a.plot_f0:
            #     t = [n * 0.005 for n in range(x.shape[0])]
            #     x2 = lu.read_binfile(path.join(ACO3DIR, sentence+'.aco'), dim=ds.AX_DIM)
            #     x2[:,ax.LF0][x2[:,-1] == 0.0] = 0.0
            #     pyplot.plot(t, x2[:len(t),1])
            #     pyplot.plot(t, x[:,1])#np.exp(x[:,ax.LF0]))
            #     # pyplot.plot(t, [y if y > 0 else 0.0 for y in x2[:len(t),0]])
            #     pyplot.savefig(path.join(outdir, sentence+'_mag.pdf'))
            #     pyplot.close()

            lu.write_binfile(x[:,ax.MAG], path.join(outdir, sentence+'.mag'))
            lu.write_binfile(x[:,ax.REAL], path.join(outdir, sentence+'.real'))
            lu.write_binfile(x[:,ax.IMAG], path.join(outdir, sentence+'.imag'))
            lu.write_binfile(x[:,ax.LF0], path.join(outdir, sentence+'.lf0'))

            try:
                call('{script} -s {sentence} -o {outdir} -m {magdim} -p {phadim} -f merlin {const_rate}'\
                    .format(script=path.join(SRCDIR, 'wavify.py'),
                            sentence=sentence,
                            #vocdir=outdir,
                            outdir=outdir,
                            magdim=ax.MAG_DIM,
                            phadim=ax.PHASE_DIM,
                            const_rate='' if ds.CONST_RATE else '-v').split())
            except Exception as e:
                print2(e)
Example #10
0
from argparse import ArgumentParser

if __name__ == '__main__':
    p = ArgumentParser()
    p.add_argument('-s', '--senlst', dest='senlst', required=True)
    p.add_argument('-c', '--config', dest='config', required=True)
    a = p.parse_args()

    load_config(a.config)
    from __init__ import *
    import acoustic as ax
    import dataset as ds

    with open(a.senlst) as f:
        sentences = [l.rstrip() for l in f if l]

    mean = np.zeros([1, ds.AX_DIM])
    stddev = np.zeros([1, ds.AX_DIM])
    n = 0
    for s in sentences:
        data = lu.read_binfile(path.join(ACO3DIR, s + '.aco'), dim=ds.AX_DIM)
        for dt in data:
            mean += dt
            stddev += np.multiply(dt, dt)
            n += 1
    mean /= n
    stddev = np.sqrt(stddev / n - np.multiply(mean, mean))

    lu.write_binfile(mean, path.join(STTDIR, 'mean'))
    lu.write_binfile(stddev, path.join(STTDIR, 'stddev'))