Beispiel #1
0
def process_row(row, writer):
    actual_sentence = row['transcript']
    target_sentence = row['target']

    if len(actual_sentence.strip()) == 0:
        return
    if len(target_sentence.strip()) == 0:
        return

    print("{}-{}".format(actual_sentence, target_sentence))

    diff = SentenceDiff(actual_sentence, target_sentence)
    row['wer'] = diff.wer()
    row['score'] = diff.chatterize_score() * 100
    writer.writerow(row)
Beispiel #2
0
 def test_substitutions(self):
     list_of_lists = [["a", "b"], ["x", "y", "z"]]
     result = SentenceDiff._all_substitutions(list_of_lists)
     assert result == \
        [("a","b"),
         ("b","a"),
         ("x", "y"),
         ("x", "z"),
         ("y", "x"),
         ("y", "z"),
         ("z", "x"),
         ("z", "y")]
Beispiel #3
0
    def chatterize_score(self):
        homonyms = SentenceDiff._homonyms(self.actual_lower)
        max_similarity = -1
        for homonym in homonyms:
            similarity = self.similarity(homonym, self.target_lower)
            if similarity > max_similarity:
                max_similarity = similarity

        pass_fail = "SUPER PASS" if max_similarity > THRESHOLD_SUPER_PASS \
                    else "PASS" if max_similarity > THRESHOLD_PASS \
                    else "FAIL"

        return pass_fail, max_similarity
from sentence_diff import SentenceDiff
import csv

with open('edit_score_highlights.csv', newline='') as csv_file_in:
    with open('edit_score__highlights_out.csv', 'w',
              newline='') as csv_file_out:
        writer = csv.writer(csv_file_out,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow([
            "Actual Text", "Target Text", "Score By Words", "Score By Letters",
            "New Score", "Attn", "New Score 2"
        ])  #"Updated Wer", "Wer Score"
        reader = csv.reader(csv_file_in, delimiter=',', quotechar='"')
        reader.__next__()
        for row in reader:
            actual_sentence = row[0]
            target_sentence = row[1]

            if len(actual_sentence.strip()) == 0:
                continue
            if len(target_sentence.strip()) == 0:
                continue

            print("{}-{}".format(actual_sentence, target_sentence))

            diff = SentenceDiff(actual_sentence, target_sentence)
            row.append(diff.chatterize_score() * 100)
            writer.writerow(row)
def diff(actual_sentence, target_sentence):
    return SentenceDiff(actual_sentence=actual_sentence,
                        target_sentence=target_sentence)
Beispiel #6
0
 def normalize(self, text):
     return \
         SentenceDiff._remove_punctuation(
                 SentenceDiff._spell_out_numbers_in_word(
                     SentenceDiff._sound_out_dollars(
                         profanity.censor(text.lower(), 'x'))))
Beispiel #7
0
 def __init__(self, actual, target):
     SentenceDiff._assert_not_empty(actual, target)
     self.actual = actual
     self.target = target
     self.actual_lower = self.normalize(actual)
     self.target_lower = self.normalize(target)
def chatterize_score(actual_sentence, target_sentence):
    diff = SentenceDiff(actual_sentence, target_sentence)
    return diff.chatterize_score()
 def test_normalize_1_dollar(self):
     d = SentenceDiff("xx","xx")
     assert d._normalize("here is $1 for you") == "here is 1 dollar for you"
 def test_normalize_100_dollars(self):
     d = SentenceDiff("xx","xx")
     assert d._normalize("$100") == "100 dollars"
Beispiel #11
0
 def test_sentence_homonymsdeserts(self):
     result = SentenceDiff._homonyms("I love desert")
     assert result == ["I love desert", "I love dessert"]