def convert(file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir, b_prevent_zeros=False): ''' b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least. (not recommended, only useful when there are too many utterances crashed) ''' # Conversion: lu.mkdir(out_lab_dir) v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#') crashlist_file = lu.ins_pid('crash_file_list.scp') for filename in v_filenames: # Display: print('\nConverting lab file: ' + filename + '................................') # Current i/o files: in_lab_file = os.path.join(in_lab_dir, filename + '.lab') out_lab_file = os.path.join(out_lab_dir, filename + '.lab') in_shift_file = os.path.join(in_feats_dir, filename + '.shift') # Debug: ''' v_shift = lu.read_binfile(in_shift_file, dim=1) v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros) la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file) #''' v_n_frms = 0 try: v_shift = lu.read_binfile(in_shift_file, dim=1) v_n_frms = mp.get_num_of_frms_per_state( v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros, n_states_x_phone=1) la.convert_label_state_align_to_var_frame_rate( in_lab_file, v_n_frms, out_lab_file) except (KeyboardInterrupt, SystemExit): raise except: print("crashlist") with open(crashlist_file, "a") as crashlistlog: crashlistlog.write(filename + '\n') print('Done!')
def convert(file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir, b_prevent_zeros=False): ''' b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least. (not recommended, only useful when there are too many utterances crashed) ''' # Conversion: lu.mkdir(out_lab_dir) v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#') crashlist_file = lu.ins_pid('crash_file_list.scp') for filename in v_filenames: # Display: print('\nConverting lab file: ' + filename + '................................') # Current i/o files: in_lab_file = os.path.join(in_lab_dir , filename + '.lab') out_lab_file = os.path.join(out_lab_dir , filename + '.lab') in_shift_file = os.path.join(in_feats_dir, filename + '.shift') # Debug: ''' v_shift = lu.read_binfile(in_shift_file, dim=1) v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros) la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file) #''' try: v_shift = lu.read_binfile(in_shift_file, dim=1) v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros) la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file) except (KeyboardInterrupt, SystemExit): raise except: with open(crashlist_file, "a") as crashlistlog: crashlistlog.write(filename + '\n') print('Done!')
if __name__ == '__main__': # CONSTANTS: So far, the vocoder has been tested only with the following constants:=== fs = 48000 # INPUT:============================================================================== files_scp = '../data/file_id.scp' # List of file names (tokens). Format used by Merlin. in_lab_st_dir = '../data/labs' # Original state aligned label files directory (in the format used by Merlin). in_shift_dir = '../data/params' # Directory containing .shift files (You need to run feature extraction before running this script.) out_lab_st_dir = '../data/labs_var_rate' # Directory that will contain the converted "variable frame rate" state aligned label files. b_prevent_zeros = False # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed) # PROCESSING:========================================================================= lu.mkdir(out_lab_st_dir) v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#') n_files = len(v_fileTokns) crashlist_file = lu.ins_pid('crash_file_list.scp') for ftkn in v_fileTokns: # Display: print('\nAnalysing file: ' + ftkn + '................................') # Input files: in_lab_st_file = in_lab_st_dir + '/' + ftkn + '.lab' out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab' in_shift_file = in_shift_dir + '/' + ftkn + '.shift' try: v_shift = lu.read_binfile(in_shift_file, dim=1)
if __name__ == '__main__': # CONSTANTS: So far, the vocoder has been tested only with the following constants:=== fs = 48000 # INPUT:============================================================================== files_scp = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin. in_lab_st_dir = '../data_48k/labs' # Original state aligned label files directory (in the format used by Merlin). in_shift_dir = '../data_48k/params' # Directory containing .shift files (You need to run feature extraction before running this script.) out_lab_st_dir = '../data_48k/labs_var_rate' # Directory that will contain the converted "variable frame rate" state aligned label files. b_prevent_zeros = False # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed) # PROCESSING:========================================================================= lu.mkdir(out_lab_st_dir) v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#') n_files = len(v_fileTokns) crashlist_file = lu.ins_pid('crash_file_list.scp') for ftkn in v_fileTokns: # Display: print('\nAnalysing file: ' + ftkn + '................................') # Input files: in_lab_st_file = in_lab_st_dir + '/' + ftkn + '.lab' out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab' in_shift_file = in_shift_dir + '/' + ftkn + '.shift' try: v_shift = lu.read_binfile(in_shift_file, dim=1)
lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0') # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin. lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift') return if __name__ == '__main__': # CONSTANTS: So far, the vocoder has been tested only with the following constants:=== fft_len = 4096 fs = 48000 # INPUT:============================================================================== files_scp = '../data/file_id.scp' # List of file names (tokens). Format used by Merlin. in_wav_dir = '../data/wavs_nat' # Directory with the wavfiles to extract the features from. out_feats_dir = '../data/params' # Output directory that will contain the extracted features. mvf = 4500 # Maximum voiced frequency (Hz) # FILES SETUP:======================================================================== lu.mkdir(out_feats_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, out_feats_dir, fft_len, mvf) print('Done!')
# File setup: wav_file = os.path.join(in_wav_dir, file_name_token + '.wav') mp.analysis_compressed(wav_file, out_dir=out_feats_dir) return if __name__ == '__main__': # INPUT:============================================================================== files_scp = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin. in_wav_dir = '../data_48k/wavs_nat' # Directory with the wavfiles to extract the features from. out_feats_dir = '../data_48k/params' # Output directory that will contain the extracted features. # FILES SETUP:======================================================================== lu.mkdir(out_feats_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, out_feats_dir) # For debug (Don't remove): #for file_name_token in l_file_tokns: # feat_extraction(in_wav_dir, file_name_token, out_feats_dir) print('Done!')
print( "\nCopying files from base experiment to current experiment location.............." ) # Copy files and directories from base to current experiment: copytree(base_exper_path, l_files_and_dirs_to_copy, exper_path) os.rename(os.path.join(exper_path, 'conf/config.conf'), os.path.join(exper_path, 'conf/config_base.conf')) # Save backup of this file and used magphase code: shutil.copytree(os.path.dirname(mp.__file__), os.path.join(exper_path, 'backup_magphase_code')) shutil.copy2(__file__, os.path.join(exper_path, 'conf')) # Read file list: l_file_tokns = lu.read_text_file2(os.path.join(exper_path, file_id_list), dtype='string', comments='#').tolist() if b_feat_extr: # Extract features: acoustic_feats_path = os.path.join(exper_path, acoustic_feats_dir) lu.mkdir(acoustic_feats_path) if b_feat_ext_multiproc: lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, acoustic_feats_path, d_mp_opts) else: for file_name_token in l_file_tokns: feat_extraction(in_wav_dir, file_name_token, acoustic_feats_path, d_mp_opts)
if __name__ == '__main__': # Parsing input arg: config_file = sys.argv[1] # Constants: b_prevent_zeros = False # True if you want to ensure that all the phonemes have one frame at least. # (not recommended, only usful when there are too many utterances crashed) # Parsing config file: file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir = parse_config_file(config_file) # Conversion: lu.mkdir(out_lab_dir) v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#') n_files = len(v_filenames) crashlist_file = lu.ins_pid('crash_file_list.scp') for filename in v_filenames: # Display: print('\nConverting lab file: ' + filename + '................................') # Current i/o files: in_lab_file = path.join(in_lab_dir , filename + '.lab') out_lab_file = path.join(out_lab_dir , filename + '.lab') # Debug: #print('out_lab_file:-----------------------------------')