labs = io.load_txt_dir(args.txtdir, args.comma_is_pause) if args.dict == None: raise SiReError("No path to dictionary. Please use -dict option.") args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0]) #The phoneme set used must match the dictionary. args.phoneme_features = args.dictionary.phoneme_feats elif args.intype == "hts_lab": labs = io.open_labdir_line_by_line(args.inpath) args.intype = "hts_mlf" elif args.intype == "sire_lab": labs = io.open_labdir_line_by_line(args.inpath) else: if not os.path.exists(args.inpath): raise SiReError("Input path to mlf does no exist!") mlf = open(args.inpath, "r").readlines() labs = io.parse_mlf(mlf, args.intype) #Used if we make questions fitted to a dataset #We use qfile as the path to the file later. if args.questions == True: parser.add_argument('-qfile', type=str, help="A variable used to store the question file.", default=None) args.qfile = open(args.qpath, "w") parser.add_argument('-qfileutt', type=str, help="A variable used to store the GV question file.", default=None) args.qfileutt = open(args.qpath+"_utt", "w") for lab in labs: print "Making full context label for {0}".format(lab[0]) #Make an utt utt = utterance.Utterance(lab, args) #This writes out the label and also the questions write_context_utt(utt, args)
nargs=argparse.REMAINDER, default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split()) parser.add_argument( '-f', action='store_true', help="Force overwrite of outputpath file if it exists.") parser.add_argument( '-no_syll_stress', action='store_true', help="Replace syllable stress markers with a boundary marker sb.") args = parser.parse_args() wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"), args.f) labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf") labs = get_phoneme_strings(labs, args.no_syll_stress) for lab in labs: wf.write(" ".join(lab) + "\n") wf.close() txtpath = os.path.join(args.outpath, "sents.txt") lmpath = os.path.join(args.outpath, "ngram.lm") #This allows for people to pass their own options to the ngram binary options = " " + " ".join(args.lm_binary_options) subprocess.call(args.ngram_binary_path + " -text " + txtpath + " -lm " + lmpath + options,
'-collapse_closure', action="store_true", help= "Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs." ) parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf( state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/" + utt.pop(0) + ".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state) + "\n")
raise SiReError("No path to dictionary. Please use -dict option.") args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0]) #The phoneme set used must match the dictionary. args.phoneme_features = args.dictionary.phoneme_feats elif args.intype == "hts_lab": labs = io.open_labdir_line_by_line(args.inpath) # print "This is a lab", len(labs[0]) # labs is a list of lists. Each list within the list of one of the labs args.intype = "hts_mlf" elif args.intype == "sire_lab": labs = io.open_labdir_line_by_line(args.inpath) else: if not os.path.exists(args.inpath): raise SiReError("Input path to mlf does no exist!") mlf = open(args.inpath, "r").readlines() labs = io.parse_mlf(mlf, args.intype) #Used if we make questions fitted to a dataset #We use qfile as the path to the file later. if args.questions == True: parser.add_argument('-qfile', type=str, help="A variable used to store the question file.", default=None) args.qfile = open(args.qpath, "w") parser.add_argument( '-qfileutt', type=str, help="A variable used to store the GV question file.", default=None) args.qfileutt = open(args.qpath + "_utt", "w")
s_lab_count += 1 merged.append(c_merge) return merged if __name__ == "__main__": parser = argparse.ArgumentParser(description='Utility file convertion related methods.') parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir')) parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path')) parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.") parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/"+utt.pop(0)+".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state)+"\n") wf.write(".\n") wf.close()