def CopyFileIfExists(utt_suffix, filename, input_dir, output_dir): if os.path.isfile(input_dir + "/" + filename): dict = ParseFileToDict(input_dir + "/" + filename, value_processor=lambda x: " ".join(x)) if len(utt_suffix) > 0: new_dict = {} for key in dict.keys(): new_dict[key + "-" + utt_suffix] = dict[key] dict = new_dict WriteDictToFile(dict, output_dir + "/" + filename)
def CreateCorruptedUtt2uniq(input_dir, output_dir, prefix): corrupted_utt2uniq = {} # Parse the utt2spk to get the utterance id utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x)) keys = sorted(utt2spk.keys()) for utt_id in keys: new_utt_id = GetNewId(utt_id, prefix) corrupted_utt2uniq[new_utt_id] = utt_id WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq")
def main(): args = GetArgs() fg_snrs = map(int, args.fg_snr_str.split(":")) bg_snrs = map(int, args.bg_snr_str.split(":")) input_dir = args.input_dir output_dir = args.output_dir num_bg_noises = map(int, args.num_bg_noises.split(":")) reco2dur = ParseFileToDict(input_dir + "/reco2dur", value_processor=lambda x: float(x[0])) wav_scp_file = open(input_dir + "/wav.scp", 'r').readlines() noise_wavs = {} noise_reco2dur = {} bg_noise_utts = [] fg_noise_utts = [] # Load background noises if args.bg_noise_dir: bg_noise_wav_filename = args.bg_noise_dir + "/wav.scp" bg_noise_utts, bg_noise_wavs = GetNoiseList(bg_noise_wav_filename) bg_noise_reco2dur = ParseFileToDict( args.bg_noise_dir + "/reco2dur", value_processor=lambda x: float(x[0])) noise_wavs.update(bg_noise_wavs) noise_reco2dur.update(bg_noise_reco2dur) # Load background noises if args.fg_noise_dir: fg_noise_wav_filename = args.fg_noise_dir + "/wav.scp" fg_noise_reco2dur_filename = args.fg_noise_dir + "/reco2dur" fg_noise_utts, fg_noise_wavs = GetNoiseList(fg_noise_wav_filename) fg_noise_reco2dur = ParseFileToDict( args.fg_noise_dir + "/reco2dur", value_processor=lambda x: float(x[0])) noise_wavs.update(fg_noise_wavs) noise_reco2dur.update(fg_noise_reco2dur) random.seed(args.random_seed) new_utt2wav = {} new_utt2spk = {} # Augment each line in the wav file for line in wav_scp_file: toks = line.rstrip().split(" ") utt = toks[0] wav = " ".join(toks[1:]) dur = reco2dur[utt] new_wav = AugmentWav(utt, wav, dur, fg_snrs, bg_snrs, fg_noise_utts, bg_noise_utts, noise_wavs, noise_reco2dur, args.fg_interval, num_bg_noises) new_utt = utt + "-" + args.utt_suffix new_utt2wav[new_utt] = new_wav if not os.path.exists(output_dir): os.makedirs(output_dir) WriteDictToFile(new_utt2wav, output_dir + "/wav.scp") CopyFileIfExists(args.utt_suffix, "utt2spk", input_dir, output_dir) CopyFileIfExists(args.utt_suffix, "utt2lang", input_dir, output_dir) CopyFileIfExists(args.utt_suffix, "text", input_dir, output_dir) CopyFileIfExists(args.utt_suffix, "utt2spk", input_dir, output_dir) CopyFileIfExists(args.utt_suffix, "vad.scp", input_dir, output_dir) CopyFileIfExists("", "spk2gender", input_dir, output_dir) data_lib.RunKaldiCommand( "utils/fix_data_dir.sh {output_dir}".format(output_dir=output_dir))
def main(): args = GetArgs() fg_snrs = list(map(int, args.fg_snr_str.split(":"))) bg_snrs = list(map(int, args.bg_snr_str.split(":"))) input_dir = args.input_dir output_dir = args.output_dir num_bg_noises = list(map(int, args.num_bg_noises.split(":"))) reco2dur = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) wav_scp_file = open(input_dir + "/wav.scp", 'r').readlines() noise_wavs = {} noise_reco2dur = {} bg_noise_utts = [] fg_noise_utts = [] # Load background noises if args.bg_noise_dir: bg_noise_wav_filename = args.bg_noise_dir + "/wav.scp" bg_noise_utts, bg_noise_wavs = GetNoiseList(bg_noise_wav_filename) bg_noise_reco2dur = ParseFileToDict(args.bg_noise_dir + "/reco2dur", value_processor = lambda x: float(x[0])) noise_wavs.update(bg_noise_wavs) noise_reco2dur.update(bg_noise_reco2dur) # Load background noises if args.fg_noise_dir: fg_noise_wav_filename = args.fg_noise_dir + "/wav.scp" fg_noise_reco2dur_filename = args.fg_noise_dir + "/reco2dur" fg_noise_utts, fg_noise_wavs = GetNoiseList(fg_noise_wav_filename) fg_noise_reco2dur = ParseFileToDict(args.fg_noise_dir + "/reco2dur", value_processor = lambda x: float(x[0])) noise_wavs.update(fg_noise_wavs) noise_reco2dur.update(fg_noise_reco2dur) random.seed(args.random_seed) new_utt2wav = {} new_utt2spk = {} # Augment each line in the wav file for line in wav_scp_file: toks = line.rstrip().split(" ") utt = toks[0] wav = " ".join(toks[1:]) dur = reco2dur[utt] new_wav = AugmentWav(utt, wav, dur, fg_snrs, bg_snrs, fg_noise_utts, bg_noise_utts, noise_wavs, noise_reco2dur, args.fg_interval, num_bg_noises) new_utt = GetNewId(utt, args.utt_prefix) new_utt2wav[new_utt] = new_wav if not os.path.exists(output_dir): os.makedirs(output_dir) WriteDictToFile(new_utt2wav, output_dir + "/wav.scp") AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", args.utt_prefix, field = [0, 1]) data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt" .format(output_dir = output_dir)) if os.path.isfile(input_dir + "/utt2uniq"): AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", args.utt_prefix, field =[0]) else: # Create the utt2uniq file CreateCorruptedUtt2uniq(input_dir, output_dir, args.utt_prefix) if os.path.isfile(input_dir + "/text"): AddPrefixToFields(input_dir + "/text", output_dir + "/text", args.utt_prefix, field =[0]) if os.path.isfile(input_dir + "/segments"): AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", args.utt_prefix, field = [0,1]) if os.path.isfile(input_dir + "/reco2file_and_channel"): AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", args.utt_prefix, field = [0,1]) data_lib.RunKaldiCommand("utils/fix_data_dir.sh {output_dir}" .format(output_dir = output_dir)) data_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}" .format(output_dir = output_dir)) sys.exit()