def create_augmented_utt2uniq(input_dir, output_dir, utt_modifier_type, utt_modifier): clean_utt2spk_file = input_dir + "/utt2spk" clean_utt2spk_dict = parse_file_to_dict(clean_utt2spk_file, value_processor = lambda x: " ".join(x)) augmented_utt2uniq_dict = {} for key in clean_utt2spk_dict.keys(): modified_key = get_new_id(key, utt_modifier_type, utt_modifier) augmented_utt2uniq_dict[modified_key] = key write_dict_to_file(augmented_utt2uniq_dict, output_dir + "/utt2uniq")
def copy_file_if_exists(input_file, output_file, utt_modifier_type, utt_modifier, fields=[0]): if os.path.isfile(input_file): clean_dict = parse_file_to_dict(input_file, value_processor = lambda x: " ".join(x)) new_dict = {} for key in clean_dict.keys(): modified_key = get_new_id(key, utt_modifier_type, utt_modifier) if len(fields) > 1: values = clean_dict[key].split(" ") modified_values = values for idx in range(1, len(fields)): modified_values[idx-1] = get_new_id(values[idx-1], utt_modifier_type, utt_modifier) new_dict[modified_key] = " ".join(modified_values) else: new_dict[modified_key] = clean_dict[key] write_dict_to_file(new_dict, output_file)
def main(): args = get_args() input_dir = args.input_dir output_dir = args.output_dir fg_snrs = [int(i) for i in args.fg_snr_str.split(":")] bg_snrs = [int(i) for i in args.bg_snr_str.split(":")] num_bg_noises = [int(i) for i in args.num_bg_noises.split(":")] reco2dur = parse_file_to_dict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) wav_scp_file = open(input_dir + "/wav.scp", 'r').readlines() noise_wavs = {} noise_reco2dur = {} bg_noise_utts = [] fg_noise_utts = [] # Load background noises if args.bg_noise_dir: bg_noise_wav_filename = args.bg_noise_dir + "/wav.scp" bg_noise_utts, bg_noise_wavs = get_noise_list(bg_noise_wav_filename) bg_noise_reco2dur = parse_file_to_dict(args.bg_noise_dir + "/reco2dur", value_processor = lambda x: float(x[0])) noise_wavs.update(bg_noise_wavs) noise_reco2dur.update(bg_noise_reco2dur) # Load foreground noises if args.fg_noise_dir: fg_noise_wav_filename = args.fg_noise_dir + "/wav.scp" fg_noise_reco2dur_filename = args.fg_noise_dir + "/reco2dur" fg_noise_utts, fg_noise_wavs = get_noise_list(fg_noise_wav_filename) fg_noise_reco2dur = parse_file_to_dict(args.fg_noise_dir + "/reco2dur", value_processor = lambda x: float(x[0])) noise_wavs.update(fg_noise_wavs) noise_reco2dur.update(fg_noise_reco2dur) random.seed(args.random_seed) new_utt2wav = {} new_utt2spk = {} # Augment each line in the wav file for line in wav_scp_file: toks = line.rstrip().split(" ") utt = toks[0] wav = " ".join(toks[1:]) dur = reco2dur[utt] new_wav = augment_wav(utt, wav, dur, fg_snrs, bg_snrs, fg_noise_utts, bg_noise_utts, noise_wavs, noise_reco2dur, args.fg_interval, num_bg_noises) new_utt = get_new_id(utt, args.utt_modifier_type, args.utt_modifier) new_utt2wav[new_utt] = new_wav if not os.path.exists(output_dir): os.makedirs(output_dir) write_dict_to_file(new_utt2wav, output_dir + "/wav.scp") copy_file_if_exists(input_dir + "/reco2dur", output_dir + "/reco2dur", args.utt_modifier_type, args.utt_modifier) copy_file_if_exists(input_dir + "/utt2dur", output_dir + "/utt2dur", args.utt_modifier_type, args.utt_modifier) # Check whether to modify the speaker id or not while creating utt2spk file fields = ([0, 1] if args.modify_spk_id else [0]) copy_file_if_exists(input_dir + "/utt2spk", output_dir + "/utt2spk", args.utt_modifier_type, args.utt_modifier, fields=fields) copy_file_if_exists(input_dir + "/utt2lang", output_dir + "/utt2lang", args.utt_modifier_type, args.utt_modifier) copy_file_if_exists(input_dir + "/utt2num_frames", output_dir + "/utt2num_frames", args.utt_modifier_type, args.utt_modifier) copy_file_if_exists(input_dir + "/text", output_dir + "/text", args.utt_modifier_type, args.utt_modifier) copy_file_if_exists(input_dir + "/segments", output_dir + "/segments", args.utt_modifier_type, args.utt_modifier, fields=[0, 1]) copy_file_if_exists(input_dir + "/vad.scp", output_dir + "/vad.scp", args.utt_modifier_type, args.utt_modifier) copy_file_if_exists(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", args.utt_modifier_type, args.utt_modifier, fields=[0, 1]) if args.modify_spk_id: copy_file_if_exists(input_dir + "/spk2gender", output_dir + "/spk2gender", args.utt_modifier_type, args.utt_modifier) else: copy_file_if_exists(input_dir + "/spk2gender", output_dir + "/spk2gender", None, "") # Create utt2uniq file if os.path.isfile(input_dir + "/utt2uniq"): copy_file_if_exists(input_dir + "/utt2uniq", output_dir + "/utt2uniq", args.utt_modifier_type, args.utt_modifier, fields=[0]) else: create_augmented_utt2uniq(input_dir, output_dir, args.utt_modifier_type, args.utt_modifier) data_lib.RunKaldiCommand("subtools/kaldi/utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt" .format(output_dir = output_dir)) data_lib.RunKaldiCommand("subtools/kaldi/utils/fix_data_dir.sh {output_dir}".format(output_dir = output_dir))