def get_es_recall_wer_from_row(row):
	score = 0
	for index in range(0, RECALL_COUNT):
		column_index = FIRST_ES_RECALL_COLUMN + index * RECALL_INTERVAL
		reference = column_index_to_es_recall_reference[column_index]
		hypothesis = row[column_index]
		one_minus_wer = 1 - wer(reference, clean_extra_words(reference, hypothesis))
		score += one_minus_wer
	return score
예제 #2
0
from libs.wer import wer
from libs.wer import clean_extra_words

ref = "Yeah, I could tell, since you didn't call or write the entire time it was happening. No, I know, I was just"
hyp1 = "(music in background) A: Yeah, I could tell, since you didn't call her right as it was happening B: No, I was just"
hyp2 = "Yeah, I could tell, since you didn't call her right as it was happening No, I was just"

wer1 = wer(ref, hyp1)
wer2 = wer(ref, hyp2)
# print(wer1, ';', wer2)

print(clean_extra_words(ref, hyp1))

ref = "the dedicated detectives who investigate these vicious felonies are members of an elite squad known as the special victims unit."
hyp1 = "Opening of Law and Order: SVU -- The dedicated detectives who investigate these vicious crimes are known as the Special Victims Unit"
hyp2 = "The dedicated detectives who investigate these vicious crimes are known as the Special Victims Unit"

wer1 = wer(ref, hyp1)
wer2 = wer(ref, hyp2)
# print(wer1, ';', wer2)

# print(clean_extra_words(ref, hyp1))
예제 #3
0
import csv
from libs.constants import *
from libs.csv_utils import write_spreadsheet_file
from libs.translation_util import clean
from libs.wer import clean_extra_words

IN_FILE_NAME = 'data/filtered_raw_data.csv'

csv_data = None
with open(IN_FILE_NAME, 'r', encoding="utf-8") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
    csv_data = list(csv_reader)

recalls = []
for row_index in range(0, len(csv_data) - FIRST_DATA_ROW):
    row = csv_data[row_index + FIRST_DATA_ROW]
    for index in range(0, RECALL_COUNT):
        column_index = FIRST_EN_RECALL_COLUMN + index * RECALL_INTERVAL
        reference = column_index_to_en_recall_reference[column_index]
        hypothesis = row[column_index]
        recalls.append([clean(reference)])
        recalls.append([clean(hypothesis)])
        recalls.append([clean_extra_words(reference, hypothesis)])

write_spreadsheet_file('output/en_recall_sentences.txt', recalls)