Exemple #1
0
def make_questions(context_skeleton, qformat, generic=True, HHEd_fix=False):
  context_dict = context_skeleton.added_contexts
  if qformat not in ["NN", "HMM"]:
    print "Error: Invalid question format! " + qformat
    print "Must be either HMM or NN!"
    sys.exit()
  questions = []
  if generic == False:
    for key in context_dict:
      qtype = getattr(context_skeleton, key)
      #Setting it removes duplicates and sorting them is important for HMM question creation
      #(sorting them is also prettier for the NN)
      vals = list(set(context_dict[key]))
      vals.sort()
      if qtype == None:
        pass
      elif qtype == "bool":
        for val in vals:
          #If this is current phoneme id we apply the HHEd_fix
          if HHEd_fix == True and key == "cp":
            val = "-"+str(val)+"+"
          if qformat == "HMM":
            questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
          elif qformat == "NN":
            questions.append("LQ 0 \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
      elif "float" in qtype:
        if qformat == "HMM":
          questions += make_hmm_relational_qs(vals, key, qtype)
        elif qformat == "NN":
          for val in vals:
            #HHEd's pattern matching for both NN and HMM's uses '.' as a special
            #character. We therefore use the int version of the float value in the
            #search pattern. But we have to check for xx first.
            if val == "xx":
              questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}")
            else:
              questions.append("LQ 1 "+str(val)+" \""+key+"-"+strintify(float(val))+"\" {*|"+key+":"+strintify(float(val))+"|*}")
      elif "int" in qtype:
          if qformat == "HMM":
            questions += make_hmm_relational_qs(vals, key, qtype)
          elif qformat == "NN":
            for val in vals:
              #The NN relies on floats for the actual value so we use that there
              #and the int in the naming. But we have to check for xx first.
              if val == "xx":
                questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}")
              else:
                questions.append("LQ 1 "+strfloatify(val)+" \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
      else:
        print "Error: Odd question type, should not exist - "+qtype
        sys.exit()
  else:
    print "Error: Not implemented yet! (Outputting generic question set)"
  return questions
Exemple #2
0
def make_hmm_relational_qs(values, key, qtype):
    questions = []
    #Add xx question if appropriate else ignore
    if "xx" in values:
        if "xx" in qtype:
            questions.append("QS \"" + key + "-xx\" {*|" + key + ":xx|*}")
        else:
            raise SiReError(
                "xx in values but not in qtype {0} for key {1} - why?".format(
                    qtype, key))
        values.remove("xx")
    for i, val in enumerate(values):
        if "float" in qtype:
            val = strintify(float(val))
        questions.append("QS \"" + key + "-" + str(val) + "\" {*|" + key +
                         ":" + str(val) + "|*}")
        #If val is more than one we make a less than question
        #If we count 0 then we start at 0
        if "0" in qtype:
            start = 0
        else:
            start = 1
        if int(val) > start:
            #Prep the less than string
            s = "QS \"" + key + "<=" + str(val) + "\" {"
            #Make the less than string
            #Get tens and remainder
            tens = int(val) / 10
            remainder = int(val) % 10
            if tens > 0:
                #Make singles
                for n in range(start, 10):
                    s += "*|" + key + ":" + str(n) + "|*,"
                #Make tens
                for n in range(1, tens):
                    s += "*|" + key + ":" + str(n) + "?|*,"
                for n in range(remainder + 1):
                    if n != remainder:
                        s += "*|" + key + ":" + str(tens) + str(n) + "|*,"
                    else:
                        s += "*|" + key + ":" + str(tens) + str(n) + "|*}"
                        questions.append(s)
            else:
                #Just make singles
                for n in range(start, int(val) + 1):
                    s += "*|" + key + ":" + str(n) + "|*"
                    if n != int(val):
                        s += ","
                    else:
                        s += "}"
                        questions.append(s)
    return questions
 def get_context_string(self, HHEd_fix=False):
   s = ""
   s += str(self.added_contexts["start"]) + " "
   s += str(self.added_contexts["end"]) + " "
   for key in self.added_contexts:
     if key in ["start", "end"]:
       continue
     if "float" in getattr(self, key):
       s += "|"+key+":"+context_utils.strintify(float(self.added_contexts[key]))
     else:
       s += "|"+key+":"+str(self.added_contexts[key])
   if HHEd_fix == True:
     s = s.replace("cp:"+self.added_contexts["cp"], "cp:-"+self.added_contexts["cp"]+"+")
   return s+"|"
Exemple #4
0
def make_hmm_relational_qs(values, key, qtype):
  questions = []
  #Add xx question if appropriate else ignore
  if "xx" in values:
    if "xx" in qtype:
      questions.append("QS \""+key+"-xx\" {*|"+key+":xx|*}")
    else:
      print "Error: xx in values but not in qtype {0} for key {1} - why?".format(qtype,key)
      sys.exit()
    values.remove("xx")
  for i, val in enumerate(values):
    if "float" in qtype:
      val = strintify(float(val))
    questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
    #If val is more than one we make a less than question
    #If we count 0 then we start at 0
    if "0" in qtype:
      start = 0
    else:
      start = 1
    if int(val) > start:
      #Prep the less than string
      s = "QS \""+key+"<="+str(val)+"\" {"
      #Make the less than string
      #Get tens and remainder
      tens = int(val)/10
      remainder = int(val)%10
      if tens > 0:
        #Make singles
        for n in range(start, 10):
          s += "*|"+key+":"+str(n)+"|*,"
        #Make tens
        for n in range(1, tens):
          s += "*|"+key+":"+str(n)+"?|*,"
        for n in range(remainder+1):
          if n != remainder:
            s += "*|"+key+":"+str(tens)+str(n)+"|*,"
          else:
            s += "*|"+key+":"+str(tens)+str(n)+"|*}"
            questions.append(s)
      else:
        #Just make singles
        for n in range(start, int(val)+1):
          s += "*|"+key+":"+str(n)+"|*"
          if n != int(val):
            s += ","
          else:
            s += "}"
            questions.append(s)
  return questions
Exemple #5
0
 def get_context_string(self, HHEd_fix=False):
   s = ""
   s += str(self.added_contexts["start"]) + " "
   s += str(self.added_contexts["end"]) + " "
   for key in self.added_contexts:
     if key in ["start", "end"]:
       continue
     if "float" in getattr(self, key):
       if self.added_contexts[key] == "xx" and "xx" in getattr(self, key):
         s += "|"+key+":"+str(self.added_contexts[key])
       else:
         s += "|"+key+":"+context_utils.strintify(float(self.added_contexts[key]))
     else:
       s += "|"+key+":"+str(self.added_contexts[key])
   if HHEd_fix == True:
     s = s.replace("cp:"+self.added_contexts["cp"], "cp:-"+self.added_contexts["cp"]+"+")
   return s+"|"
Exemple #6
0
def make_questions(context_skeleton, qformat, generic=True, HHEd_fix=False, utt=False):
  context_dict = context_skeleton.added_contexts
  #We write out each context not used just for checks.
  #TODO: Make it possible to throw an exception if context not used.
  #TODO: Currently we ignore this when making GV contexts.
  if utt == False:
    for context in vars(context_skeleton).keys():
      if context not in context_dict and context != "added_contexts":
        print context_dict.keys()
        print "Warning! Context ({0}) not used!".format(context)
  if qformat not in ["Nitech_NN", "HMM", "CSTR_NN"]:
    raise SiReError("Invalid question format ({0})! Must be either HMM, Nitech_NN or CSTR_NN!".format(qformat))
  questions = []
  if generic == False:
    for key in context_dict:
      qtype = getattr(context_skeleton, key)
      #Setting it removes duplicates and sorting them is important for HMM question creation
      #(sorting them is also prettier for the NN)
      vals = list(set(context_dict[key]))
      vals.sort()
      if qtype == None:
        pass
      elif qtype == "bool":
        for val in vals:
          #If this is current phoneme id we apply the HHEd_fix
          if HHEd_fix == True and key == "cp":
            val = "-"+str(val)+"+"
          if qformat == "HMM" or qformat == "CSTR_NN":
            questions.append("QS \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
          elif qformat == "Nitech_NN":
            questions.append("LQ 0 \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
      elif "float" in qtype:
        if qformat == "HMM":
          questions += make_hmm_relational_qs(vals, key, qtype)
        elif qformat == "Nitech_NN":
          for val in vals:
            #HHEd's pattern matching for both NN and HMM's uses '.' as a special
            #character. We therefore use the int version of the float value in the
            #search pattern. But we have to check for xx first.
            if val == "xx":
              questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}")
            else:
              questions.append("LQ 1 "+str(val)+" \""+key+"-"+strintify(float(val))+"\" {*|"+key+":"+strintify(float(val))+"|*}")
        elif qformat == "CSTR_NN":
          #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int.
          #Note that floats needs to be strintified in the actual question.
          questions.append("CQS \""+key+"\" {*|"+key+":(\d+)|*}")
      elif "int" in qtype:
        if qformat == "HMM":
          questions += make_hmm_relational_qs(vals, key, qtype)
        elif qformat == "Nitech_NN":
          for val in vals:
            #The NN relies on floats for the actual value so we use that there
            #and the int in the naming. But we have to check for xx first.
            if val == "xx":
              questions.append("LQ 1 0.0 \""+key+"-xx\" {*|"+key+":xx|*}")
            else:
              questions.append("LQ 1 "+strfloatify(int(val))+" \""+key+"-"+str(val)+"\" {*|"+key+":"+str(val)+"|*}")
        elif qformat == "CSTR_NN":
          #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int.
          #Note that floats needs to be strintified in the actual question.
          questions.append("CQS \""+key+"\" {*|"+key+":(\d+)|*}")
      else:
        raise SiReError("Odd question type, should not exist - {0}".format(qtype))
  else:
    raise SiReError("Not implemented yet! (Outputting generic question set)")
  return questions
Exemple #7
0
def make_questions(context_skeleton,
                   qformat,
                   generic=True,
                   HHEd_fix=False,
                   utt=False):
    context_dict = context_skeleton.added_contexts
    #We write out each context not used just for checks.
    #TODO: Make it possible to throw an exception if context not used.
    #TODO: Currently we ignore this when making GV contexts.
    if utt == False:
        for context in vars(context_skeleton).keys():
            if context not in context_dict and context != "added_contexts":
                print context_dict.keys()
                print "Warning! Context ({0}) not used!".format(context)
    if qformat not in ["Nitech_NN", "HMM", "CSTR_NN"]:
        raise SiReError(
            "Invalid question format ({0})! Must be either HMM, Nitech_NN or CSTR_NN!"
            .format(qformat))
    questions = []
    if generic == False:
        for key in context_dict:
            qtype = getattr(context_skeleton, key)
            #Setting it removes duplicates and sorting them is important for HMM question creation
            #(sorting them is also prettier for the NN)
            vals = list(set(context_dict[key]))
            vals.sort()
            if qtype == None:
                pass
            elif qtype == "bool":
                for val in vals:
                    #If this is current phoneme id we apply the HHEd_fix
                    if HHEd_fix == True and key == "cp":
                        val = "-" + str(val) + "+"
                    if qformat == "HMM" or qformat == "CSTR_NN":
                        questions.append("QS \"" + key + "-" + str(val) +
                                         "\" {*|" + key + ":" + str(val) +
                                         "|*}")
                    elif qformat == "Nitech_NN":
                        questions.append("LQ 0 \"" + key + "-" + str(val) +
                                         "\" {*|" + key + ":" + str(val) +
                                         "|*}")
            elif "float" in qtype:
                if qformat == "HMM":
                    questions += make_hmm_relational_qs(vals, key, qtype)
                elif qformat == "Nitech_NN":
                    for val in vals:
                        #HHEd's pattern matching for both NN and HMM's uses '.' as a special
                        #character. We therefore use the int version of the float value in the
                        #search pattern. But we have to check for xx first.
                        if val == "xx":
                            questions.append("LQ 1 0.0 \"" + key +
                                             "-xx\" {*|" + key + ":xx|*}")
                        else:
                            questions.append("LQ 1 " + str(val) + " \"" + key +
                                             "-" + strintify(float(val)) +
                                             "\" {*|" + key + ":" +
                                             strintify(float(val)) + "|*}")
                elif qformat == "CSTR_NN":
                    #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int.
                    #Note that floats needs to be strintified in the actual question.
                    questions.append("CQS \"" + key + "\" {*|" + key +
                                     ":(\d+)|*}")
            elif "int" in qtype:
                if qformat == "HMM":
                    questions += make_hmm_relational_qs(vals, key, qtype)
                elif qformat == "Nitech_NN":
                    for val in vals:
                        #The NN relies on floats for the actual value so we use that there
                        #and the int in the naming. But we have to check for xx first.
                        if val == "xx":
                            questions.append("LQ 1 0.0 \"" + key +
                                             "-xx\" {*|" + key + ":xx|*}")
                        else:
                            questions.append("LQ 1 " + strfloatify(int(val)) +
                                             " \"" + key + "-" + str(val) +
                                             "\" {*|" + key + ":" + str(val) +
                                             "|*}")
                elif qformat == "CSTR_NN":
                    #For CSTR's DNN system we just need to specify the question and not one for each value of the question if float or int.
                    #Note that floats needs to be strintified in the actual question.
                    questions.append("CQS \"" + key + "\" {*|" + key +
                                     ":(\d+)|*}")
            else:
                raise SiReError(
                    "Odd question type, should not exist - {0}".format(qtype))
    else:
        raise SiReError(
            "Not implemented yet! (Outputting generic question set)")
    return questions