Beispiel #1
0
 def test_normalizeAlignment_textnum_hyphen(self):
     ref_aligned = [
         u'A', u'50-year-old', u'business', u'man', u'lamented', u'to',
         u'me', u'that', u'he', u'feels', u'he', u"doesn't", u'have',
         u'colleagues', u'anymore', u'at', u'work'
     ]
     hyp_aligned = [
         u'', u'fifty year old', u'business', u'man', u'laments', u'to',
         u'me', u'that', u'he', u'feels', u'he', u"doesn't", u'have',
         u'colleagues', u'anymore', u'it', u'work'
     ]
     alignment = [
         u'D', u'S', u'C', u'C', u'S', u'C', u'C', u'C', u'C', u'C', u'C',
         u'C', u'C', u'C', u'C', u'S', u'C'
     ]
     expected = ' '.join([
         u'', u'50-year-old', u'business', u'man', u'laments', u'to', u'me',
         u'that', u'he', u'feels', u'he', u"doesn't", u'have',
         u'colleagues', u'anymore', u'it', u'work'
     ])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #2
0
 def test_normalizeAlignment_number(self):
     ref_aligned = [
         u'You', u'need', u'to', u'know', u'that', u'the', u'average',
         u'patent', u'troll', u'defense', u'costs', u'two million',
         u'dollars', u'and', u'takes', u'18', u'months', u'when', u'you',
         u'win'
     ]
     hyp_aligned = [
         u'you', u'need', u'to', u'know', u'that', u'the', u'average',
         u'patent', u'troll', u'defense', u'cost', u'2000000', u'dollars',
         u'and', u'takes', u'18', u'months', u'when', u'you', u'win'
     ]
     alignment = [
         u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'S',
         u'S', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C'
     ]
     expected = ' '.join([
         u'you', u'need', u'to', u'know', u'that', u'the', u'average',
         u'patent', u'troll', u'defense', u'cost', u'two million',
         u'dollars', u'and', u'takes', u'18', u'months', u'when', u'you',
         u'win'
     ])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #3
0
 def test_normalize_alignment_year(self):
     ref_aligned = ["Now", "fast-forward", "to", "2012"]
     hyp_aligned = ["now", "fast-forward", "to", "twenty twelve"]
     alignment = ["C", "C", "C", "S"]
     expected = ' '.join(["now", "fast-forward", "to", "2012"])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #4
0
 def test_normalize_alignment_any_more(self):
     hyp_aligned = ["any more"]
     ref_aligned = ["anymore"]
     alignment = ["S"]
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     expected = ' '.join(ref_aligned)
     self.assertEqual(actual, expected)
Beispiel #5
0
 def test_normalize_middle_age(self):
     ref_aligned = [
         "They're",
         "happier",
         "than",
         "middle-aged",
         "people",
         "and",
         "younger",
         "people",
         "certainly",
     ]
     hyp_aligned = [
         "they are",
         "happier",
         "the",
         "middle age",
         "people",
         "and",
         "younger",
         "people",
         "certainly",
     ]
     alignment = [
         "S",
         "C",
         "S",
         "S",
         "C",
         "C",
         "C",
         "C",
         "C",
     ]
     expected = [
         "They're",
         "happier",
         "the",
         "middle age",
         "people",
         "and",
         "younger",
         "people",
         "certainly",
     ]
     expected = ' '.join([x for x in expected if x])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #6
0
 def test_normalize_94(self):
     ref_aligned = [
         "Originally",
         "the",
         "sample",
         "was",
         "aged",
         "18",
         "to",
         "94",
     ]
     hyp_aligned = [
         "originally",
         "the",
         "sample",
         "was",
         "aged",
         "eighteen",
         "to",
         "ninety four",
     ]
     alignment = [
         "C",
         "C",
         "C",
         "C",
         "C",
         "S",
         "C",
         "S",
     ]
     expected = [
         "originally",
         "the",
         "sample",
         "was",
         "aged",
         "18",
         "to",
         "94",
     ]
     expected = ' '.join([x for x in expected if x])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #7
0
 def test_normalizeAlignment_911(self):
     ref_aligned = [
         u'Everyone', u'who', u'knew', u'me', u'before', u'911', u'',
         u'believes', u"I'm", u'dead'
     ]
     hyp_aligned = [
         u'everyone', u'who', u'knew', u'me', u'before', u'nine 11', u'the',
         u'believes', u'line', u''
     ]
     alignment = [
         u'C', u'C', u'C', u'C', u'C', u'S', u'I', u'S', u'S', u'D'
     ]
     expected = ' '.join([
         u'everyone', u'who', u'knew', u'me', u'before', u'911', u'the',
         u'believes', u'line', u''
     ]).strip()
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #8
0
 def test_normalizeAlignment_text_hyphen(self):
     ref_aligned = [
         u'Our', u'digital', u'body', u'is', u'', u'', u'', u'',
         u'one-to-one', u'life'
     ]
     hyp_aligned = [
         u'are', u'what', u'it', u'is', u'all', u'about', u'the', u'these',
         u'one to one', u'life'
     ]
     alignment = [
         u'S', u'S', u'S', u'C', u'I', u'I', u'I', u'I', u'S', u'C'
     ]
     expected = ' '.join([
         u'are', u'what', u'it', u'is', u'all', u'about', u'the', u'these',
         u'one-to-one', u'life'
     ])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #9
0
 def test_normalize_alignment_theyre2(self):
     ref_aligned = [
         "In", "our", "study", "they", "are", "more", "positive", "but",
         "they're", "", "also", "more", "likely", "than", "younger",
         "people", "to", "experience", "mixed", "emotions", "sadness", "at",
         "the", "same", "time", "you", "experience", "happiness", "you",
         "know", "that", "tear", "in", "the", "eye", "when", "you're",
         "smiling", "at", "a", "friend"
     ]
     hyp_aligned = [
         "in", "our", "study", "they", "are", "more", "positive", "but",
         "they", "are", "also", "more", "likely", "than", "younger",
         "people", "to", "experience", "mixed", "emotions", "sadness", "at",
         "the", "same", "time", "you", "experience", "happiness", "you",
         "know", "that", "tear", "in", "the", "eye", "when", "you're",
         "smiling", "at", "a", "friend"
     ]
     alignment = [
         "C", "C", "C", "C", "C", "C", "C", "C", "S", "I", "C", "C", "C",
         "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
         "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
         "C", "C"
     ]
     expected = ' '.join([
         "in", "our", "study", "they", "are", "more", "positive", "but",
         "they're", "also", "more", "likely", "than", "younger", "people",
         "to", "experience", "mixed", "emotions", "sadness", "at", "the",
         "same", "time", "you", "experience", "happiness", "you", "know",
         "that", "tear", "in", "the", "eye", "when", "you're", "smiling",
         "at", "a", "friend"
     ])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)
Beispiel #10
0
 def test_normalize_abbrev_wrong(self):
     ref_aligned = [
         "So",
         "we",
         "learned",
         "the",
         "majority",
         "of",
         "anatomic",
         "classes",
         "taught",
         "they",
         "do",
         "not",
         "have",
         "a",
         "cadaver",
         "dissection",
         "lab",
     ]
     hyp_aligned = [
         "so",
         "we",
         "learned",
         "the",
         "most jury p.",
         "o.",
         "anatomy",
         "class",
         "called",
         "they",
         "do",
         "not",
         "have",
         "",
         "had ever",
         "dissection",
         "lead",
     ]
     alignment = [
         "C",
         "C",
         "C",
         "C",
         "S",
         "S",
         "S",
         "S",
         "S",
         "C",
         "C",
         "C",
         "C",
         "D",
         "S",
         "C",
         "S",
     ]
     expected = [
         "So",
         "we",
         "learned",
         "the",
         "most jury p.",
         "o.",
         "anatomy",
         "class",
         "called",
         "they",
         "do",
         "not",
         "have",
         "",
         "had ever",
         "dissection",
         "lead",
     ]
     expected = ' '.join([x for x in expected if x])
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align,
                                                    fix_casing=True)
     self.assertEqual(actual, expected)
Beispiel #11
0
 def test_normalize_alignment_ex1(self):
     ref_aligned = [
         "Our",
         "digital",
         "body",
         "is",
         "one-to-one",
         "",
         "",
         "life",
         "size",
         "so",
         "this",
         "is",
         "exactly",
         "the",
         "way",
         "students",
         "will",
         "see",
         "the",
         "real",
         "anatomy",
     ]
     hyp_aligned = [
         "our",
         "peaceful",
         "body",
         "is",
         "one two",
         "one",
         "life",
         "life",
         "size",
         "so",
         "this",
         "is",
         "exactly",
         "the",
         "way",
         "students",
         "would",
         "see",
         "the",
         "real",
         "anatomy",
     ]
     alignment = [
         "C",
         "S",
         "C",
         "C",
         "S",
         "I",
         "I",
         "C",
         "C",
         "C",
         "C",
         "C",
         "C",
         "C",
         "C",
         "C",
         "S",
         "C",
         "C",
         "C",
         "C",
     ]
     expected = ' '.join(hyp_aligned)
     expand_align = ExpandedAlignment(ref_aligned,
                                      hyp_aligned,
                                      alignment,
                                      lowercase=True)
     actual = HypothesisNormalizer.normalizeAligned(expand_align)
     self.assertEqual(actual, expected)