def get_sire_utts(labdir, context_type, hhed_fix, durlab=False): labs = sire_io.open_labdir_line_by_line(labdir, dur_lab=durlab) quinphone = [] phone = [] triphone = [] args.intype = "sire_lab" args.context_type = context_type args.HHEd_fix = hhed_fix #args.dictionary = dictionary.Dictionary(dictionarypath) args.phoneme_features = phoneme_features.CombilexPhonemes() #As we do not require the input text to analyse from a sire_lab we need to disable the festival features args.festival_features = False utts = get_utts(labs, args) return utts
if args.stanford_dependency_parse: args.dependencydict = read_stanford_dependency_parses(args.parsedir) if args.intype == "txt": if not os.path.isdir(args.inpath): raise SiReError("Input path is not a directory! It must be when creating labs from text.") args.txtdir = args.inpath labs = io.load_txt_dir(args.txtdir, args.comma_is_pause) if args.dict == None: raise SiReError("No path to dictionary. Please use -dict option.") args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0]) #The phoneme set used must match the dictionary. args.phoneme_features = args.dictionary.phoneme_feats elif args.intype == "hts_lab": labs = io.open_labdir_line_by_line(args.inpath) args.intype = "hts_mlf" elif args.intype == "sire_lab": labs = io.open_labdir_line_by_line(args.inpath) else: if not os.path.exists(args.inpath): raise SiReError("Input path to mlf does no exist!") mlf = open(args.inpath, "r").readlines() labs = io.parse_mlf(mlf, args.intype) #Used if we make questions fitted to a dataset #We use qfile as the path to the file later. if args.questions == True: parser.add_argument('-qfile', type=str, help="A variable used to store the question file.", default=None) args.qfile = open(args.qpath, "w") parser.add_argument('-qfileutt', type=str, help="A variable used to store the GV question file.", default=None)
args.dependencydict = read_stanford_dependency_parses(args.parsedir) if args.intype == "txt": if not os.path.isdir(args.inpath): raise SiReError( "Input path is not a directory! It must be when creating labs from text." ) args.txtdir = args.inpath labs = io.load_txt_dir(args.txtdir, args.comma_is_pause) if args.dict == None: raise SiReError("No path to dictionary. Please use -dict option.") args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0]) #The phoneme set used must match the dictionary. args.phoneme_features = args.dictionary.phoneme_feats elif args.intype == "hts_lab": labs = io.open_labdir_line_by_line(args.inpath) # print "This is a lab", len(labs[0]) # labs is a list of lists. Each list within the list of one of the labs args.intype = "hts_mlf" elif args.intype == "sire_lab": labs = io.open_labdir_line_by_line(args.inpath) else: if not os.path.exists(args.inpath): raise SiReError("Input path to mlf does no exist!") mlf = open(args.inpath, "r").readlines() labs = io.parse_mlf(mlf, args.intype) #Used if we make questions fitted to a dataset #We use qfile as the path to the file later. if args.questions == True: parser.add_argument('-qfile',
wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/" + utt.pop(0) + ".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state) + "\n") wf.write(".\n") wf.close() if args.merge_hvite_state_with_full_context != None: full_context_labs = sire_io.open_labdir_line_by_line( args.merge_hvite_state_with_full_context[1]) mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_full_context[0]) state_labs = sire_io.parse_mlf(mlf, "align_mlf") if args.collapse_closure == True: for x, lab in enumerate(state_labs): for i, l in enumerate(lab): if '_cl' in l[-1]: if state_labs[x][i + 3][-1] + '_cl' == l[-1]: state_labs[x][i + 3] = lab[i + 3][:4] else: raise SiReError( "Something wrong with {0}".format(l)) merged = merge_hvite_state_align_and_full_context_lab( state_labs, full_context_labs) outdirpath = args.merge_hvite_state_with_full_context[2]
merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/"+utt.pop(0)+".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state)+"\n") wf.write(".\n") wf.close() if args.merge_hvite_state_with_full_context != None: full_context_labs = sire_io.open_labdir_line_by_line(args.merge_hvite_state_with_full_context[1]) mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_full_context[0]) state_labs = sire_io.parse_mlf(mlf, "align_mlf") if args.collapse_closure == True: for x, lab in enumerate(state_labs): for i, l in enumerate(lab): if '_cl' in l[-1]: if state_labs[x][i+3][-1]+'_cl' == l[-1]: state_labs[x][i+3] = lab[i+3][:4] else: raise SiReError("Something wrong with {0}".format(l)) merged = merge_hvite_state_align_and_full_context_lab(state_labs, full_context_labs) outdirpath = args.merge_hvite_state_with_full_context[2] for lab in merged: filename = lab.pop(0)+".lab" print "Creating - "+os.path.join(outdirpath, filename)