def make_questions(context_skeleton, qformat, generic=True, HHEd_fix=False): context_dict = context_skeleton.added_contexts if qformat not in ["NN", "HMM"]: print "Error: Invalid question format! " + qformat print "Must be either HMM or NN!" sys.exit() questions = [] if generic == False: for key in context_dict: qtype = getattr(context_skeleton, key) #Setting it removes duplicates and sorting them is important for HMM question creation #(sorting them is also prettier for the NN) vals = list(set(context_dict[key])) vals.sort() if qtype == None: pass elif qtype == "bool": for val in vals: #If this is current phoneme id we apply the HHEd_fix if HHEd_fix == True and key == "cp": val = "-"+str(val)+"+" if qformat == "HMM": questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") elif qformat == "NN": questions.append("LQ 0 \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") elif "float" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "NN": for val in vals: #HHEd's pattern matching for both NN and HMM's uses '.' as a special #character. We therefore use the int version of the float value in the #search pattern. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}") else: questions.append("LQ 1 "+str(val)+" \""+key+"-"+strintify(float(val))+"\" {*|"+key+":"+strintify(float(val))+"|*}") elif "int" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "NN": for val in vals: #The NN relies on floats for the actual value so we use that there #and the int in the naming. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}") else: questions.append("LQ 1 "+strfloatify(val)+" \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") else: print "Error: Odd question type, should not exist - "+qtype sys.exit() else: print "Error: Not implemented yet! (Outputting generic question set)" return questions
def make_hmm_relational_qs(values, key, qtype): questions = [] #Add xx question if appropriate else ignore if "xx" in values: if "xx" in qtype: questions.append("QS \"" + key + "-xx\" {*|" + key + ":xx|*}") else: raise SiReError( "xx in values but not in qtype {0} for key {1} - why?".format( qtype, key)) values.remove("xx") for i, val in enumerate(values): if "float" in qtype: val = strintify(float(val)) questions.append("QS \"" + key + "-" + str(val) + "\" {*|" + key + ":" + str(val) + "|*}") #If val is more than one we make a less than question #If we count 0 then we start at 0 if "0" in qtype: start = 0 else: start = 1 if int(val) > start: #Prep the less than string s = "QS \"" + key + "<=" + str(val) + "\" {" #Make the less than string #Get tens and remainder tens = int(val) / 10 remainder = int(val) % 10 if tens > 0: #Make singles for n in range(start, 10): s += "*|" + key + ":" + str(n) + "|*," #Make tens for n in range(1, tens): s += "*|" + key + ":" + str(n) + "?|*," for n in range(remainder + 1): if n != remainder: s += "*|" + key + ":" + str(tens) + str(n) + "|*," else: s += "*|" + key + ":" + str(tens) + str(n) + "|*}" questions.append(s) else: #Just make singles for n in range(start, int(val) + 1): s += "*|" + key + ":" + str(n) + "|*" if n != int(val): s += "," else: s += "}" questions.append(s) return questions
def get_context_string(self, HHEd_fix=False): s = "" s += str(self.added_contexts["start"]) + " " s += str(self.added_contexts["end"]) + " " for key in self.added_contexts: if key in ["start", "end"]: continue if "float" in getattr(self, key): s += "|"+key+":"+context_utils.strintify(float(self.added_contexts[key])) else: s += "|"+key+":"+str(self.added_contexts[key]) if HHEd_fix == True: s = s.replace("cp:"+self.added_contexts["cp"], "cp:-"+self.added_contexts["cp"]+"+") return s+"|"
def make_hmm_relational_qs(values, key, qtype): questions = [] #Add xx question if appropriate else ignore if "xx" in values: if "xx" in qtype: questions.append("QS \""+key+"-xx\" {*|"+key+":xx|*}") else: print "Error: xx in values but not in qtype {0} for key {1} - why?".format(qtype,key) sys.exit() values.remove("xx") for i, val in enumerate(values): if "float" in qtype: val = strintify(float(val)) questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") #If val is more than one we make a less than question #If we count 0 then we start at 0 if "0" in qtype: start = 0 else: start = 1 if int(val) > start: #Prep the less than string s = "QS \""+key+"<="+str(val)+"\" {" #Make the less than string #Get tens and remainder tens = int(val)/10 remainder = int(val)%10 if tens > 0: #Make singles for n in range(start, 10): s += "*|"+key+":"+str(n)+"|*," #Make tens for n in range(1, tens): s += "*|"+key+":"+str(n)+"?|*," for n in range(remainder+1): if n != remainder: s += "*|"+key+":"+str(tens)+str(n)+"|*," else: s += "*|"+key+":"+str(tens)+str(n)+"|*}" questions.append(s) else: #Just make singles for n in range(start, int(val)+1): s += "*|"+key+":"+str(n)+"|*" if n != int(val): s += "," else: s += "}" questions.append(s) return questions
def get_context_string(self, HHEd_fix=False): s = "" s += str(self.added_contexts["start"]) + " " s += str(self.added_contexts["end"]) + " " for key in self.added_contexts: if key in ["start", "end"]: continue if "float" in getattr(self, key): if self.added_contexts[key] == "xx" and "xx" in getattr(self, key): s += "|"+key+":"+str(self.added_contexts[key]) else: s += "|"+key+":"+context_utils.strintify(float(self.added_contexts[key])) else: s += "|"+key+":"+str(self.added_contexts[key]) if HHEd_fix == True: s = s.replace("cp:"+self.added_contexts["cp"], "cp:-"+self.added_contexts["cp"]+"+") return s+"|"
def make_questions(context_skeleton, qformat, generic=True, HHEd_fix=False, utt=False): context_dict = context_skeleton.added_contexts #We write out each context not used just for checks. #TODO: Make it possible to throw an exception if context not used. #TODO: Currently we ignore this when making GV contexts. if utt == False: for context in vars(context_skeleton).keys(): if context not in context_dict and context != "added_contexts": print context_dict.keys() print "Warning! Context ({0}) not used!".format(context) if qformat not in ["Nitech_NN", "HMM", "CSTR_NN"]: raise SiReError("Invalid question format ({0})! Must be either HMM, Nitech_NN or CSTR_NN!".format(qformat)) questions = [] if generic == False: for key in context_dict: qtype = getattr(context_skeleton, key) #Setting it removes duplicates and sorting them is important for HMM question creation #(sorting them is also prettier for the NN) vals = list(set(context_dict[key])) vals.sort() if qtype == None: pass elif qtype == "bool": for val in vals: #If this is current phoneme id we apply the HHEd_fix if HHEd_fix == True and key == "cp": val = "-"+str(val)+"+" if qformat == "HMM" or qformat == "CSTR_NN": questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") elif qformat == "Nitech_NN": questions.append("LQ 0 \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") elif "float" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "Nitech_NN": for val in vals: #HHEd's pattern matching for both NN and HMM's uses '.' as a special #character. We therefore use the int version of the float value in the #search pattern. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}") else: questions.append("LQ 1 "+str(val)+" \""+key+"-"+strintify(float(val))+"\" {*|"+key+":"+strintify(float(val))+"|*}") elif qformat == "CSTR_NN": #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int. #Note that floats needs to be strintified in the actual question. questions.append("CQS \""+key+"\" {*|"+key+":(\d+)|*}") elif "int" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "Nitech_NN": for val in vals: #The NN relies on floats for the actual value so we use that there #and the int in the naming. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}") else: questions.append("LQ 1 "+strfloatify(int(val))+" \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}") elif qformat == "CSTR_NN": #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int. #Note that floats needs to be strintified in the actual question. questions.append("CQS \""+key+"\" {*|"+key+":(\d+)|*}") else: raise SiReError("Odd question type, should not exist - {0}".format(qtype)) else: raise SiReError("Not implemented yet! (Outputting generic question set)") return questions
def make_questions(context_skeleton, qformat, generic=True, HHEd_fix=False, utt=False): context_dict = context_skeleton.added_contexts #We write out each context not used just for checks. #TODO: Make it possible to throw an exception if context not used. #TODO: Currently we ignore this when making GV contexts. if utt == False: for context in vars(context_skeleton).keys(): if context not in context_dict and context != "added_contexts": print context_dict.keys() print "Warning! Context ({0}) not used!".format(context) if qformat not in ["Nitech_NN", "HMM", "CSTR_NN"]: raise SiReError( "Invalid question format ({0})! Must be either HMM, Nitech_NN or CSTR_NN!" .format(qformat)) questions = [] if generic == False: for key in context_dict: qtype = getattr(context_skeleton, key) #Setting it removes duplicates and sorting them is important for HMM question creation #(sorting them is also prettier for the NN) vals = list(set(context_dict[key])) vals.sort() if qtype == None: pass elif qtype == "bool": for val in vals: #If this is current phoneme id we apply the HHEd_fix if HHEd_fix == True and key == "cp": val = "-" + str(val) + "+" if qformat == "HMM" or qformat == "CSTR_NN": questions.append("QS \"" + key + "-" + str(val) + "\" {*|" + key + ":" + str(val) + "|*}") elif qformat == "Nitech_NN": questions.append("LQ 0 \"" + key + "-" + str(val) + "\" {*|" + key + ":" + str(val) + "|*}") elif "float" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "Nitech_NN": for val in vals: #HHEd's pattern matching for both NN and HMM's uses '.' as a special #character. We therefore use the int version of the float value in the #search pattern. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \"" + key + "-xx\" {*|" + key + ":xx|*}") else: questions.append("LQ 1 " + str(val) + " \"" + key + "-" + strintify(float(val)) + "\" {*|" + key + ":" + strintify(float(val)) + "|*}") elif qformat == "CSTR_NN": #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int. #Note that floats needs to be strintified in the actual question. questions.append("CQS \"" + key + "\" {*|" + key + ":(\d+)|*}") elif "int" in qtype: if qformat == "HMM": questions += make_hmm_relational_qs(vals, key, qtype) elif qformat == "Nitech_NN": for val in vals: #The NN relies on floats for the actual value so we use that there #and the int in the naming. But we have to check for xx first. if val == "xx": questions.append("LQ 1 0.0 \"" + key + "-xx\" {*|" + key + ":xx|*}") else: questions.append("LQ 1 " + strfloatify(int(val)) + " \"" + key + "-" + str(val) + "\" {*|" + key + ":" + str(val) + "|*}") elif qformat == "CSTR_NN": #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int. #Note that floats needs to be strintified in the actual question. questions.append("CQS \"" + key + "\" {*|" + key + ":(\d+)|*}") else: raise SiReError( "Odd question type, should not exist - {0}".format(qtype)) else: raise SiReError( "Not implemented yet! (Outputting generic question set)") return questions