def get_es_recall_wer_from_row(row): score = 0 for index in range(0, RECALL_COUNT): column_index = FIRST_ES_RECALL_COLUMN + index * RECALL_INTERVAL reference = column_index_to_es_recall_reference[column_index] hypothesis = row[column_index] one_minus_wer = 1 - wer(reference, clean_extra_words(reference, hypothesis)) score += one_minus_wer return score
from libs.wer import wer from libs.wer import clean_extra_words ref = "Yeah, I could tell, since you didn't call or write the entire time it was happening. No, I know, I was just" hyp1 = "(music in background) A: Yeah, I could tell, since you didn't call her right as it was happening B: No, I was just" hyp2 = "Yeah, I could tell, since you didn't call her right as it was happening No, I was just" wer1 = wer(ref, hyp1) wer2 = wer(ref, hyp2) # print(wer1, ';', wer2) print(clean_extra_words(ref, hyp1)) ref = "the dedicated detectives who investigate these vicious felonies are members of an elite squad known as the special victims unit." hyp1 = "Opening of Law and Order: SVU -- The dedicated detectives who investigate these vicious crimes are known as the Special Victims Unit" hyp2 = "The dedicated detectives who investigate these vicious crimes are known as the Special Victims Unit" wer1 = wer(ref, hyp1) wer2 = wer(ref, hyp2) # print(wer1, ';', wer2) # print(clean_extra_words(ref, hyp1))
import csv from libs.constants import * from libs.csv_utils import write_spreadsheet_file from libs.translation_util import clean from libs.wer import clean_extra_words IN_FILE_NAME = 'data/filtered_raw_data.csv' csv_data = None with open(IN_FILE_NAME, 'r', encoding="utf-8") as csv_file: csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') csv_data = list(csv_reader) recalls = [] for row_index in range(0, len(csv_data) - FIRST_DATA_ROW): row = csv_data[row_index + FIRST_DATA_ROW] for index in range(0, RECALL_COUNT): column_index = FIRST_EN_RECALL_COLUMN + index * RECALL_INTERVAL reference = column_index_to_en_recall_reference[column_index] hypothesis = row[column_index] recalls.append([clean(reference)]) recalls.append([clean(hypothesis)]) recalls.append([clean_extra_words(reference, hypothesis)]) write_spreadsheet_file('output/en_recall_sentences.txt', recalls)