Exemple #1
0
def wavgen_magphase(gen_dir, file_id_list, cfg, logger):

    # Import MagPhase and libraries:
    sys.path.append(cfg.magphase_bindir)
    import libutils as lu
    import libaudio as la
    import magphase as mp

    nfiles = len(file_id_list)
    for nxf in xrange(nfiles):
        filename_token = file_id_list[nxf]
        logger.info('Creating waveform for %4d of %4d: %s' %
                    (nxf + 1, nfiles, filename_token))

        for pf_type in cfg.magphase_pf_type:
            gen_wav_dir = os.path.join(gen_dir + '_wav_pf_' + pf_type)
            lu.mkdir(gen_wav_dir)
            mp.synthesis_from_acoustic_modelling(
                gen_dir,
                filename_token,
                gen_wav_dir,
                cfg.mag_dim,
                cfg.real_dim,
                cfg.sr,
                pf_type=pf_type,
                b_const_rate=cfg.magphase_const_rate)

    return
def convert(file_id_list,
            in_lab_dir,
            in_feats_dir,
            fs,
            out_lab_dir,
            b_prevent_zeros=False):
    '''
    b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least.
    (not recommended, only useful when there are too many utterances crashed)
    '''

    # Conversion:
    lu.mkdir(out_lab_dir)
    v_filenames = lu.read_text_file2(file_id_list,
                                     dtype='string',
                                     comments='#')

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for filename in v_filenames:

        # Display:
        print('\nConverting lab file: ' + filename +
              '................................')

        # Current i/o files:
        in_lab_file = os.path.join(in_lab_dir, filename + '.lab')
        out_lab_file = os.path.join(out_lab_dir, filename + '.lab')

        in_shift_file = os.path.join(in_feats_dir, filename + '.shift')

        # Debug:
        '''
        v_shift  = lu.read_binfile(in_shift_file, dim=1)
        v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)
        la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)
        #'''
        v_n_frms = 0
        try:
            v_shift = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(
                v_shift,
                in_lab_file,
                fs,
                b_prevent_zeros=b_prevent_zeros,
                n_states_x_phone=1)
            la.convert_label_state_align_to_var_frame_rate(
                in_lab_file, v_n_frms, out_lab_file)
        except (KeyboardInterrupt, SystemExit):
            raise

        except:
            print("crashlist")
            with open(crashlist_file, "a") as crashlistlog:
                crashlistlog.write(filename + '\n')
    print('Done!')
def convert(file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir, b_prevent_zeros=False):

    '''
    b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least.
    (not recommended, only useful when there are too many utterances crashed)
    '''

    # Conversion:
    lu.mkdir(out_lab_dir)
    v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#')

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for filename in v_filenames:

        # Display:
        print('\nConverting lab file: ' + filename + '................................')

        # Current i/o files:
        in_lab_file   = os.path.join(in_lab_dir  , filename + '.lab')
        out_lab_file  = os.path.join(out_lab_dir , filename + '.lab')

        in_shift_file = os.path.join(in_feats_dir, filename + '.shift')


        # Debug:
        '''
        v_shift  = lu.read_binfile(in_shift_file, dim=1)
        v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)
        la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)
        #'''

        try:
            v_shift  = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)

            la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)

        except (KeyboardInterrupt, SystemExit):
            raise

        except:
            with open(crashlist_file, "a") as crashlistlog:
                crashlistlog.write(filename + '\n')

    print('Done!')
Exemple #4
0
def wavgen_magphase(gen_dir, file_id_list, cfg, logger):

    # Import MagPhase and libraries:
    sys.path.append(cfg.magphase_bindir)
    import libutils as lu
    import libaudio as la
    import magphase as mp

    nfiles = len(file_id_list)
    for nxf in xrange(nfiles):
        filename_token = file_id_list[nxf]
        logger.info('Creating waveform for %4d of %4d: %s' % (nxf+1, nfiles, filename_token))

        for pf_type in cfg.magphase_pf_type:
            gen_wav_dir = os.path.join(gen_dir + '_wav_pf_' + pf_type)
            lu.mkdir(gen_wav_dir)
            mp.synthesis_from_acoustic_modelling(gen_dir, filename_token, gen_wav_dir, cfg.mag_dim, cfg.real_dim,
                                                            cfg.sr, pf_type=pf_type, b_const_rate=cfg.magphase_const_rate)

    return
Exemple #5
0
import magphase as mp

if __name__ == '__main__':

    # CONSTANTS: So far, the vocoder has been tested only with the following constants:===
    fs = 48000

    # INPUT:==============================================================================
    files_scp = '../data/file_id.scp'  # List of file names (tokens). Format used by Merlin.
    in_lab_st_dir = '../data/labs'  # Original state aligned label files directory (in the format used by Merlin).
    in_shift_dir = '../data/params'  # Directory containing .shift files (You need to run feature extraction before running this script.)
    out_lab_st_dir = '../data/labs_var_rate'  # Directory that will contain the converted "variable frame rate" state aligned label files.
    b_prevent_zeros = False  # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed)

    # PROCESSING:=========================================================================
    lu.mkdir(out_lab_st_dir)
    v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#')
    n_files = len(v_fileTokns)

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for ftkn in v_fileTokns:

        # Display:
        print('\nAnalysing file: ' + ftkn + '................................')

        # Input files:
        in_lab_st_file = in_lab_st_dir + '/' + ftkn + '.lab'
        out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab'
        in_shift_file = in_shift_dir + '/' + ftkn + '.shift'

        try:
if __name__ == '__main__':  
    
    # CONSTANTS: So far, the vocoder has been tested only with the following constants:===
    fs = 48000

    # INPUT:==============================================================================
    files_scp      = '../data_48k/file_id.scp'   # List of file names (tokens). Format used by Merlin.
    in_lab_st_dir  = '../data_48k/labs'          # Original state aligned label files directory (in the format used by Merlin).
    in_shift_dir   = '../data_48k/params'        # Directory containing .shift files (You need to run feature extraction before running this script.)
    out_lab_st_dir = '../data_48k/labs_var_rate' # Directory that will contain the converted "variable frame rate" state aligned label files.
    b_prevent_zeros = False                  # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed)


    # PROCESSING:=========================================================================
    lu.mkdir(out_lab_st_dir)
    v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#')
    n_files = len(v_fileTokns)
    
    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for ftkn in v_fileTokns:
        
        # Display:
        print('\nAnalysing file: ' + ftkn + '................................')
        
        # Input files:
        in_lab_st_file  = in_lab_st_dir  + '/' + ftkn + '.lab'
        out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab'
        in_shift_file   = in_shift_dir   + '/' + ftkn + '.shift'

        try:
    lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0')

    # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin.
    lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift')

    return


if __name__ == '__main__':

    # CONSTANTS: So far, the vocoder has been tested only with the following constants:===
    fft_len = 4096
    fs = 48000

    # INPUT:==============================================================================
    files_scp = '../data/file_id.scp'  # List of file names (tokens). Format used by Merlin.
    in_wav_dir = '../data/wavs_nat'  # Directory with the wavfiles to extract the features from.
    out_feats_dir = '../data/params'  # Output directory that will contain the extracted features.
    mvf = 4500  # Maximum voiced frequency (Hz)

    # FILES SETUP:========================================================================
    lu.mkdir(out_feats_dir)
    l_file_tokns = lu.read_text_file2(files_scp, dtype='string',
                                      comments='#').tolist()

    # MULTIPROCESSING EXTRACTION:==========================================================
    lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns,
                         out_feats_dir, fft_len, mvf)

    print('Done!')
Exemple #8
0
        copy2(
            join(this_dir, 'conf_base', 'logging_config.conf'),
            join(exper_path, 'acoustic_model', 'conf', 'logging_config.conf'))

    # Read file list:
    file_id_list = pars_acous_train['Paths']['file_id_list']
    l_file_tokns = lu.read_text_file2(file_id_list,
                                      dtype='string',
                                      comments='#').tolist()
    acoustic_feats_path = pars_acous_train['Paths']['in_acous_feats_dir']

    # Acoustic Feature Extraction:-------------------------------------------------------------
    if b_feat_extr:
        # Extract features:
        lu.mkdir(acoustic_feats_path)

        if b_feat_ext_multiproc:
            lu.run_multithreaded(
                feat_extraction,
                join(exper_path, 'acoustic_model', 'data', 'wav'),
                l_file_tokns, acoustic_feats_path, d_mp_opts)
        else:
            for file_name_token in l_file_tokns:
                feat_extraction(
                    join(exper_path, 'acoustic_model', 'data', 'wav'),
                    file_name_token, acoustic_feats_path, d_mp_opts)

    # Labels Conversion to Variable Frame Rate:------------------------------------------------
    if b_conv_labs_rate and not d_mp_opts[
            'b_const_rate']:  # NOTE: The script ./script/label_st_align_to_var_rate.py can be also called from comand line directly.
    lp.xlabel('Time (frames)')
    lp.ylabel('F0')
    lp.grid()
    return


if __name__ == '__main__':

    # INPUT:==============================================================================
    wav_file_orig = 'data_48k/wavs_nat/hvd_593.wav'  # Original natural waveform. You can choose any of the provided ones in the /wavs_nat directory.
    out_dir = 'data_48k/wavs_syn'  # Where the synthesised waveform will be stored

    b_plots = True  # True if you want to plot the extracted parameters.

    # PROCESS:============================================================================
    lu.mkdir(out_dir)

    # ANALYSIS:
    print("Analysing.....................................................")
    m_mag, m_real, m_imag, v_f0, fs, v_shift = mp.analysis_lossless(
        wav_file_orig)

    # MODIFICATIONS:
    # You can modify the parameters here if wanted.

    # SYNTHESIS:
    print("Synthesising.................................................")
    v_syn_sig = mp.synthesis_from_lossless(m_mag, m_real, m_imag, v_f0, fs)

    # SAVE WAV FILE:
    print("Saving wav file..............................................")
    # File setup:
    wav_file = os.path.join(in_wav_dir, file_name_token + '.wav')
    mp.analysis_compressed(wav_file, out_dir=out_feats_dir)

    return


if __name__ == '__main__':  
    
    # INPUT:==============================================================================
    files_scp     = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin.
    in_wav_dir    = '../data_48k/wavs_nat'    # Directory with the wavfiles to extract the features from.
    out_feats_dir = '../data_48k/params'      # Output directory that will contain the extracted features.


    # FILES SETUP:========================================================================
    lu.mkdir(out_feats_dir)
    l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist()

    # MULTIPROCESSING EXTRACTION:==========================================================
    lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, out_feats_dir)

    # For debug (Don't remove):
    #for file_name_token in l_file_tokns:
    #    feat_extraction(in_wav_dir, file_name_token, out_feats_dir)


    print('Done!')
        
                  os.path.join(exper_path, 'conf/config_base.conf'))

        # Save backup of this file and used magphase code:
        shutil.copytree(os.path.dirname(mp.__file__),
                        os.path.join(exper_path, 'backup_magphase_code'))
        shutil.copy2(__file__, os.path.join(exper_path, 'conf'))

    # Read file list:
    l_file_tokns = lu.read_text_file2(os.path.join(exper_path, file_id_list),
                                      dtype='string',
                                      comments='#').tolist()

    if b_feat_extr:
        # Extract features:
        acoustic_feats_path = os.path.join(exper_path, acoustic_feats_dir)
        lu.mkdir(acoustic_feats_path)

        if b_feat_ext_multiproc:
            lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns,
                                 acoustic_feats_path, d_mp_opts)
        else:
            for file_name_token in l_file_tokns:
                feat_extraction(in_wav_dir, file_name_token,
                                acoustic_feats_path, d_mp_opts)

    if b_config_merlin or b_wavgen:
        # Edit Merlin's config file:
        parser = configparser.ConfigParser()
        parser.optionxform = str
        parser.read([os.path.join(exper_path, 'conf/config_base.conf')])
    # INPUT:==============================================================================

    files_scp = '../demos/data_48k/file_id_predict.scp'  # List of file names (tokens). Format used by Merlin.
    in_feats_dir = '../demos/data_48k/params_predicted'  # Input directory that contains the predicted features.
    out_syn_dir = '../demos/data_48k/wavs_syn_from_predicted'  # Where the synthesised waveform will be stored.

    mag_dim = 60  # Number of Mel-scaled frequency bins.
    phase_dim = 45  # Number of Mel-scaled frequency bins kept for phase features (real and imag). It must be <= mag_dim
    pf_type = 'magphase'  # "magphase": MagPhase's own postfilter (in development)
    # "merlin":   Merlin's style postfilter.
    # "no":       No postfilter.

    b_multiproc = False  # If True, it synthesises using all the available cores in parallel. If False, it just uses one core (slower).

    # FILES SETUP:========================================================================
    lu.mkdir(out_syn_dir)
    l_file_tokns = lu.read_text_file2(files_scp, dtype='string',
                                      comments='#').tolist()

    # PROCESSING:=========================================================================
    if b_multiproc:
        lu.run_multithreaded(synthesis, in_feats_dir, l_file_tokns,
                             out_syn_dir, mag_dim, phase_dim, fs, pf_type)
    else:
        for file_tokn in l_file_tokns:
            synthesis(in_feats_dir, file_tokn, out_syn_dir, mag_dim, phase_dim,
                      fs, pf_type)

    print('Done!')

if __name__ == '__main__':

    # Parsing input arg:
    config_file = sys.argv[1]

    # Constants:
    b_prevent_zeros = False # True if you want to ensure that all the phonemes have one frame at least.
                            # (not recommended, only usful when there are too many utterances crashed)

    # Parsing config file:
    file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir = parse_config_file(config_file)

    # Conversion:
    lu.mkdir(out_lab_dir)
    v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#')
    n_files = len(v_filenames)
    
    crashlist_file = lu.ins_pid('crash_file_list.scp')

    for filename in v_filenames:

        # Display:
        print('\nConverting lab file: ' + filename + '................................')
        
        # Current i/o files:
        in_lab_file   = path.join(in_lab_dir  , filename + '.lab')
        out_lab_file  = path.join(out_lab_dir , filename + '.lab')

        # Debug:
    lp.xlabel('Time (frames)')
    lp.ylabel('F0')
    lp.grid()
    return


if __name__ == '__main__':  

    # INPUT:==============================================================================
    wav_file_orig = 'data_48k/wavs_nat/hvd_593.wav' # Original natural waveform. You can choose any of the provided ones in the /wavs_nat directory.
    out_dir       = 'data_48k/wavs_syn' # Where the synthesised waveform will be stored

    b_plots       = True # True if you want to plot the extracted parameters.

    # PROCESS:============================================================================
    lu.mkdir(out_dir)

    # ANALYSIS:
    print("Analysing.....................................................")
    m_mag, m_real, m_imag, v_f0, fs, v_shift = mp.analysis_lossless(wav_file_orig)

    # MODIFICATIONS:
    # You can modify the parameters here if wanted.

    # SYNTHESIS:
    print("Synthesising.................................................")
    v_syn_sig = mp.synthesis_from_lossless(m_mag, m_real, m_imag, v_f0, fs)

    # SAVE WAV FILE:
    print("Saving wav file..............................................")
    wav_file_syn = out_dir + '/' + lu.get_filename(wav_file_orig) + '_copy_syn_lossless.wav'
    fs = 48000

    # INPUT:==============================================================================
    files_scp     = '../data_48k/file_id.scp'     # List of file names (tokens). Format used by Merlin.
    in_feats_dir  = '../data_48k/params'          # Input directory that contains the predicted features.
    out_syn_dir   = '../data_48k/wavs_syn_merlin' # Where the synthesised waveform will be stored.

    nbins_mel     = 60    # Number of Mel-scaled frequency bins.
    nbins_phase   = 45    # Number of Mel-scaled frequency bins kept for phase features (real and imag). It must be <= nbins_mel
    b_postfilter  = True  # If True, the MagPhase vocoder post-filter is applied. Note: If you want to use the one included in Merlin, disable this one.

    b_parallel    = False  # If True, it synthesises using all the available cores in parallel. If False, it just uses one core (slower).


    # FILES SETUP:========================================================================
    lu.mkdir(out_syn_dir)
    l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist()

    # PROCESSING:=========================================================================
    if b_parallel:
        lu.run_multithreaded(synthesis, in_feats_dir, l_file_tokns, out_syn_dir, nbins_mel, nbins_phase, fs, b_postfilter)
    else:
        for file_tokn in l_file_tokns:
            synthesis(in_feats_dir, file_tokn, out_syn_dir, nbins_mel, nbins_phase, fs, b_postfilter)


    print('Done!')



Exemple #16
0
        save_config(pars_dur_train,   join(dur_model_conf_path  , 'dur_train.conf'))
        save_config(pars_dur_synth,   join(dur_model_conf_path  , 'dur_synth.conf'))
        save_config(pars_acous_train, join(acous_model_conf_path, 'acous_train.conf'))
        save_config(pars_acous_synth, join(acous_model_conf_path, 'acous_synth.conf'))

        copy2(join(this_dir, 'conf_base', 'logging_config.conf'), join(exper_path, 'acoustic_model', 'conf', 'logging_config.conf'))

    # Read file list:
    file_id_list = pars_acous_train['Paths']['file_id_list']
    l_file_tokns = lu.read_text_file2(file_id_list, dtype='string', comments='#').tolist()
    acoustic_feats_path = pars_acous_train['Paths']['in_acous_feats_dir']

    # Acoustic Feature Extraction:-------------------------------------------------------------
    if b_feat_extr:
        # Extract features:
        lu.mkdir(acoustic_feats_path)

        if b_feat_ext_multiproc:
            lu.run_multithreaded(feat_extraction, join(exper_path, 'acoustic_model', 'data', 'wav'), l_file_tokns, acoustic_feats_path, d_mp_opts)
        else:
            for file_name_token in l_file_tokns:
                feat_extraction(join(exper_path, 'acoustic_model', 'data', 'wav'), file_name_token, acoustic_feats_path, d_mp_opts)

    # Labels Conversion to Variable Frame Rate:------------------------------------------------
    if b_conv_labs_rate and not d_mp_opts['b_const_rate']: # NOTE: The script ./script/label_st_align_to_var_rate.py can be also called from comand line directly.
        label_state_align = join(exper_path, 'acoustic_model', 'data', 'label_state_align')
        label_state_align_var_rate = pars_acous_train['Labels']['label_align']
        fs = int(pars_acous_train['Waveform']['samplerate'])
        ltvr.convert(file_id_list,label_state_align, acoustic_feats_path, fs, label_state_align_var_rate)

    # Run duration training:-------------------------------------------------------------------