def write_word_score_files(scores, args): #First split on empty lines and then split on lines scores = [x.split("\n") for x in scores.split("\n\n")] #Pop the overall stats scores.pop(-1) #Take away unnecessaries scores = [x[1:-3] for x in scores] #Fix each line in each entry for i, e in enumerate(scores): for n, l in enumerate(e): l = l.split() scores[i][n] = l[1] + " " + l[-2] #Match up each scored sent with a txt file. #This relies on positions when listing txt dir. #Will fail if other files than txt files in dir or txtdir has been modified since scoring. #Could also rely on content <- safer see TODO. txt = os.listdir(args.txtdir) n = 0 for t in txt: if ".txt" in t: wf = io.open_writefile_safe( os.path.join(args.outdirpath, t[:-3] + "scored"), args.f) for l in scores[n]: wf.write(l + "\n") wf.close() n += 1
def write_word_score_files(scores, args): # First split on empty lines and then split on lines scores = [x.split("\n") for x in scores.split("\n\n")] # Pop the overall stats scores.pop(-1) # Take away unnecessaries scores = [x[1:-3] for x in scores] # Fix each line in each entry for i, e in enumerate(scores): for n, l in enumerate(e): l = l.split() scores[i][n] = l[1] + " " + l[-2] # Match up each scored sent with a txt file. # This relies on positions when listing txt dir. # Will fail if other files than txt files in dir or txtdir has been modified since scoring. # Could also rely on content <- safer see TODO. txt = os.listdir(args.txtdir) n = 0 for t in txt: if ".txt" in t: wf = io.open_writefile_safe(os.path.join(args.outdirpath, t[:-3] + "scored"), args.f) for l in scores[n]: wf.write(l + "\n") wf.close() n += 1
def combine_txt(indirpath, outfilepath, overwrite=False): lines = io.load_txt_dir(indirpath) wf = io.open_writefile_safe(outfilepath, overwrite) for line in lines: wf.write(" ".join(line[1:]) + "\n") wf.close()
def create_lattices_and_list(txtlist, outdirpath, dictionary, overwrite=False): path_list = [] for txt in txtlist: path = os.path.join(outdirpath, txt[0] + ".phoneme_slf") # Make the slf slf = lattice_tools.make_phoneme_slf(txt[1:], dictionary, pronoun_variant=True, no_syll_stress=True) # Write it out wf = io.open_writefile_safe(path, overwrite) for l in slf: wf.write(l) wf.close() # Everything has gone well so we add the path path_list.append(path) # Write out the path file. wf = io.open_writefile_safe(os.path.join(outdirpath, "lattices.list")) for p in path_list: wf.write(p + "\n") wf.close()
def create_lattices_and_list(txtlist, outdirpath, dictionary, overwrite=False): path_list = [] for txt in txtlist: path = os.path.join(outdirpath, txt[0] + ".phoneme_slf") #Make the slf slf = lattice_tools.make_phoneme_slf(txt[1:], dictionary, pronoun_variant=True, no_syll_stress=True) #Write it out wf = io.open_writefile_safe(path, overwrite) for l in slf: wf.write(l) wf.close() #Everything has gone well so we add the path path_list.append(path) #Write out the path file. wf = io.open_writefile_safe(os.path.join(outdirpath, "lattices.list")) for p in path_list: wf.write(p + "\n") wf.close()
type=str, help= "Additional arguments to be sent to the ngram binary as options. Overwrites the defaults options: -order 4 -interpolate -gt3min 1 -wbdiscount -debug 3", nargs=argparse.REMAINDER, default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split()) parser.add_argument( '-f', action='store_true', help="Force overwrite of outputpath file if it exists.") parser.add_argument( '-no_syll_stress', action='store_true', help="Replace syllable stress markers with a boundary marker sb.") args = parser.parse_args() wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"), args.f) labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf") labs = get_phoneme_strings(labs, args.no_syll_stress) for lab in labs: wf.write(" ".join(lab) + "\n") wf.close() txtpath = os.path.join(args.outpath, "sents.txt") lmpath = os.path.join(args.outpath, "ngram.lm") #This allows for people to pass their own options to the ngram binary options = " " + " ".join(args.lm_binary_options)
mlf = ["#!MLF!#\n"] for slf in slfs: if ".slf" in slf: bestpath = subprocess.check_output(args.latticetoolpath+" -in-lattice "+os.path.join(args.inpath, slf)+" -lm "+args.lmpath+" "+args.options, stderr=subprocess.STDOUT, shell=True) bestpath = bestpath.split() mlf.append("\"*/"+bestpath.pop(0)+".rec\"\n") faketime = 0 for p in bestpath: if p == "<s>" or p == "</s>": pass elif p in [".", "sp"]: mlf.append(str(faketime)+" "+str(faketime)+" "+p+" 0.0 "+p+"\n") elif p in ["#1", "#2"]: # We first add the stress marker and then a "." to mark the boundary. # Without the dot load_utterance from align mlf will not detect the syllable boundary. mlf.append(str(faketime)+" "+str(faketime)+" "+p+" 0.0 "+p+"\n") mlf.append(str(faketime)+" "+str(faketime)+" . 0.0 .\n") else: mlf.append(str(faketime)+" "+str(faketime+10000)+" "+p+" 0.0 "+p+"\n") faketime+=10000 mlf.append(".\n") wf = sire_io.open_writefile_safe(args.outmlfpath) for l in mlf: wf.write(l) wf.close()
mlf.append("\"*/" + bestpath.pop(0) + ".rec\"\n") faketime = 0 for p in bestpath: if p == "<s>" or p == "</s>": pass elif p in [".", "sp"]: mlf.append( str(faketime) + " " + str(faketime) + " " + p + " 0.0 " + p + "\n") elif p in ["#1", "#2"]: # We first add the stress marker and then a "." to mark the boundary. # Without the dot load_utterance from align mlf will not detect the syllable boundary. mlf.append( str(faketime) + " " + str(faketime) + " " + p + " 0.0 " + p + "\n") mlf.append( str(faketime) + " " + str(faketime) + " . 0.0 .\n") else: mlf.append( str(faketime) + " " + str(faketime + 10000) + " " + p + " 0.0 " + p + "\n") faketime += 10000 mlf.append(".\n") wf = sire_io.open_writefile_safe(args.outmlfpath) for l in mlf: wf.write(l) wf.close()
parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf( state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/" + utt.pop(0) + ".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state) + "\n") wf.write(".\n") wf.close() if args.merge_hvite_state_with_full_context != None: full_context_labs = sire_io.open_labdir_line_by_line( args.merge_hvite_state_with_full_context[1])
if __name__ == "__main__": parser = argparse.ArgumentParser(description='Utility file convertion related methods.') parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir')) parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path')) parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.") parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/"+utt.pop(0)+".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state)+"\n") wf.write(".\n") wf.close() if args.merge_hvite_state_with_full_context != None: full_context_labs = sire_io.open_labdir_line_by_line(args.merge_hvite_state_with_full_context[1]) mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_full_context[0]) state_labs = sire_io.parse_mlf(mlf, "align_mlf")