Esempio n. 1
0
def update_best_hyp_words_list(orig_word, encoded_vie_units, roles, new_word, best_word_hyps_list):
  #print "encoded_vie_units: " + encoded_vie_units
  #print "new_word.get_encoded_unit: " + new_word.get_encoded_units()
  if new_word.get_encoded_units() == encoded_vie_units:
    new_word_hyp = WordHyp()
    new_word_hyp.original = orig_word
    new_word_hyp.labels = label_letters(orig_word)
    new_word_hyp.roles = roles
    new_word_hyp.reconstructed_word = new_word
    new_word_hyp.mod_pen = levenshtein(new_word.to_plain_text(), orig_word)

    #print new_word_hyp.get_str()

    if len(best_word_hyps_list) == 0:
      best_word_hyps_list.append(new_word_hyp)
      return
    # If the new hypothesized word has the same edit distance to the original word
    # as it is of the first word in the list of current best hypothesized words list,
    # add the new hypothesized word to the list
    if new_word_hyp.mod_pen == best_word_hyps_list[0].mod_pen:
      best_word_hyps_list.append(new_word_hyp)
    # If the new hypothesized word has a LOWER edit distance to the original word
    # as it is of the first word in the list of current best hypothesized words list,
    # CLEAR the list and add the new hypothesized word into the list
    elif new_word_hyp.mod_pen < best_word_hyps_list[0].mod_pen:
      best_word_hyps_list.append(new_word_hyp)
  return
solution_found = True
count = 0

targ_graph_file = open(targ_graph_file_path, "r")

for line in targ_graph_file:
  start_time = time.time()

  word = line.strip()
  report("Decoding word: " + word)

  labels = label_letters(word)
  roles = [None] * len(word)
  en_phones = test_phones[count]

  best_hyp = WordHyp()
  COORD_EPSILON = 1.0
  while COORD_EPSILON >= MIN_COORD_EPSILON:
    hyps_list = generate_roles_no_ref(word, labels, en_phones, search_space, COORD_EPSILON)

    if len(hyps_list) > 0:
      # for hyp in hyps_list:
      #   hyp.get_str()

      best_hyp = get_most_prob_hyp(hyps_list, search_space)
      
      reconstructed_word = str(best_hyp.reconstructed_word)
      reconstructed_word = reconstructed_word[1:-1]
      reconstructed_word = [part.strip().split(" ") for part in reconstructed_word.split(" ] [ ")]
      reconstructed_word = " . ".join(["".join(syl) for syl in reconstructed_word])
def update_hyps_list(orig_word, roles, new_word, checked, best_word_hyps_list):
  #print "encoded_vie_units: " + encoded_vie_units
  #print "new_word.get_encoded_unit: " + new_word.get_encoded_units()

  # print "roles: " + str(roles)
  # print "new_word: " + str(new_word)
  # print "are_all_subsyl_units_valid(new_word): " + str(are_all_subsyl_units_valid(new_word))
  # print "are_all_letters_used(checked): " + str(are_all_letters_used(checked))
  # print "checked: " + str(checked)
  # print "encoded_vie_units: " + encoded_vie_units
  # print "new_word.encoded_units: " + str(new_word.get_encoded_units())
  # print "\n"

  if not are_all_subsyl_units_valid(new_word):
    return

  if not are_all_letters_used(checked):
    return

  new_word_hyp = WordHyp()
  new_word_hyp.original = orig_word
  new_word_hyp.labels = label_letters(orig_word)
  new_word_hyp.roles = roles
  new_word_hyp.reconstructed_word = new_word
  
  new_word_hyp.compute_mod_error()
  new_word_hyp.award_hyp()
  new_word_hyp.extra_letter_count = new_word.to_plain_text().count(GENERIC_VOWEL)
  new_word_hyp.count_removal()
  new_word_hyp.count_compound_role()
  evaluate_phones_score(new_word_hyp)

  best_word_hyps_list.append(new_word_hyp)

  return
def update_best_hyp_words_list(orig_word, targ_syl_struct, roles, new_word,
                               checked, best_word_hyps_list):
    if not are_all_subsyl_units_valid(new_word):
        return

    if not are_all_letters_used(checked):
        return

    if new_word.get_encoded_units() == targ_syl_struct:
        new_word_hyp = WordHyp()
        new_word_hyp.original = orig_word
        new_word_hyp.labels = label_letters(orig_word)
        new_word_hyp.roles = roles
        new_word_hyp.reconstructed_word = new_word

        new_word_hyp.compute_mod_error()
        new_word_hyp.award_hyp()
        new_word_hyp.extra_letter_count = new_word.to_plain_text().count(
            GENERIC_VOWEL)
        new_word_hyp.count_removal()
        new_word_hyp.count_compound_role()

        best_word_hyps_list.append(new_word_hyp)

        # if len(best_word_hyps_list) == 0:
        #   best_word_hyps_list.append(new_word_hyp)
        #   return
        # If the new hypothesized word has the same edit distance to the original word
        # as it is of the first word in the list of current best hypothesized words list,
        # add the new hypothesized word to the list
        # if new_word_hyp.mod_pen == best_word_hyps_list[0].mod_pen:
        #   best_word_hyps_list.append(new_word_hyp)
        # If the new hypothesized word has a LOWER edit distance to the original word
        # as it is of the first word in the list of current best hypothesized words list,
        # CLEAR the list and add the new hypothesized word into the list
        # elif new_word_hyp.mod_pen < best_word_hyps_list[0].mod_pen:
        #   best_word_hyps_list.append(new_word_hyp)
    return