def test_isDashEquivalent_false_back(self): hyp = "touch-interactive-" ref = "touch-interactive" actual = HypothesisNormalizer.normalize(hyp, ref) expected = "touch-interactive" self.assertEqual(actual, expected)
def test_normalizeAlignment_textnum_hyphen(self): ref_aligned = [ u'A', u'50-year-old', u'business', u'man', u'lamented', u'to', u'me', u'that', u'he', u'feels', u'he', u"doesn't", u'have', u'colleagues', u'anymore', u'at', u'work' ] hyp_aligned = [ u'', u'fifty year old', u'business', u'man', u'laments', u'to', u'me', u'that', u'he', u'feels', u'he', u"doesn't", u'have', u'colleagues', u'anymore', u'it', u'work' ] alignment = [ u'D', u'S', u'C', u'C', u'S', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'S', u'C' ] expected = ' '.join([ u'', u'50-year-old', u'business', u'man', u'laments', u'to', u'me', u'that', u'he', u'feels', u'he', u"doesn't", u'have', u'colleagues', u'anymore', u'it', u'work' ]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalize_ref_extra_words(self): hyp = "the fifty year old" ref = "a 50 year old" actual = HypothesisNormalizer.normalize(hyp, ref) expected = "the 50 year old" self.assertEqual(actual, expected)
def test_normalizeHyphens_notmatch_simple_reverse(self): hyp = "it's touch-interactive" ref = "touch interactive" actual = HypothesisNormalizer.normalize(hyp, ref) expected = "it's touch interactive" self.assertEqual(actual, expected)
def test_normalize_fifty_year_old_num_hyphen(self): # TODO: This one has a hyphenation issue that needs to be resolved. hyp = "fifty year old" ref = "50-year-old" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, ref)
def test_normalize_words_missing_front(self): hyp = "hundred thirty four thousand" ref = "two hundred thirty four thousand" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, hyp) self.assertNotEqual(actual, ref)
def test_normalizeAlignment_number(self): ref_aligned = [ u'You', u'need', u'to', u'know', u'that', u'the', u'average', u'patent', u'troll', u'defense', u'costs', u'two million', u'dollars', u'and', u'takes', u'18', u'months', u'when', u'you', u'win' ] hyp_aligned = [ u'you', u'need', u'to', u'know', u'that', u'the', u'average', u'patent', u'troll', u'defense', u'cost', u'2000000', u'dollars', u'and', u'takes', u'18', u'months', u'when', u'you', u'win' ] alignment = [ u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'S', u'S', u'C', u'C', u'C', u'C', u'C', u'C', u'C', u'C' ] expected = ' '.join([ u'you', u'need', u'to', u'know', u'that', u'the', u'average', u'patent', u'troll', u'defense', u'cost', u'two million', u'dollars', u'and', u'takes', u'18', u'months', u'when', u'you', u'win' ]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalize_alignment_year(self): ref_aligned = ["Now", "fast-forward", "to", "2012"] hyp_aligned = ["now", "fast-forward", "to", "twenty twelve"] alignment = ["C", "C", "C", "S"] expected = ' '.join(["now", "fast-forward", "to", "2012"]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalize_alignment_any_more(self): hyp_aligned = ["any more"] ref_aligned = ["anymore"] alignment = ["S"] expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) expected = ' '.join(ref_aligned) self.assertEqual(actual, expected)
def test_normalize_middle_age(self): ref_aligned = [ "They're", "happier", "than", "middle-aged", "people", "and", "younger", "people", "certainly", ] hyp_aligned = [ "they are", "happier", "the", "middle age", "people", "and", "younger", "people", "certainly", ] alignment = [ "S", "C", "S", "S", "C", "C", "C", "C", "C", ] expected = [ "They're", "happier", "the", "middle age", "people", "and", "younger", "people", "certainly", ] expected = ' '.join([x for x in expected if x]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalize_94(self): ref_aligned = [ "Originally", "the", "sample", "was", "aged", "18", "to", "94", ] hyp_aligned = [ "originally", "the", "sample", "was", "aged", "eighteen", "to", "ninety four", ] alignment = [ "C", "C", "C", "C", "C", "S", "C", "S", ] expected = [ "originally", "the", "sample", "was", "aged", "18", "to", "94", ] expected = ' '.join([x for x in expected if x]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalizeAlignment_911(self): ref_aligned = [ u'Everyone', u'who', u'knew', u'me', u'before', u'911', u'', u'believes', u"I'm", u'dead' ] hyp_aligned = [ u'everyone', u'who', u'knew', u'me', u'before', u'nine 11', u'the', u'believes', u'line', u'' ] alignment = [ u'C', u'C', u'C', u'C', u'C', u'S', u'I', u'S', u'S', u'D' ] expected = ' '.join([ u'everyone', u'who', u'knew', u'me', u'before', u'911', u'the', u'believes', u'line', u'' ]).strip() expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalizeAlignment_text_hyphen(self): ref_aligned = [ u'Our', u'digital', u'body', u'is', u'', u'', u'', u'', u'one-to-one', u'life' ] hyp_aligned = [ u'are', u'what', u'it', u'is', u'all', u'about', u'the', u'these', u'one to one', u'life' ] alignment = [ u'S', u'S', u'S', u'C', u'I', u'I', u'I', u'I', u'S', u'C' ] expected = ' '.join([ u'are', u'what', u'it', u'is', u'all', u'about', u'the', u'these', u'one-to-one', u'life' ]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_normalize_alignment_theyre2(self): ref_aligned = [ "In", "our", "study", "they", "are", "more", "positive", "but", "they're", "", "also", "more", "likely", "than", "younger", "people", "to", "experience", "mixed", "emotions", "sadness", "at", "the", "same", "time", "you", "experience", "happiness", "you", "know", "that", "tear", "in", "the", "eye", "when", "you're", "smiling", "at", "a", "friend" ] hyp_aligned = [ "in", "our", "study", "they", "are", "more", "positive", "but", "they", "are", "also", "more", "likely", "than", "younger", "people", "to", "experience", "mixed", "emotions", "sadness", "at", "the", "same", "time", "you", "experience", "happiness", "you", "know", "that", "tear", "in", "the", "eye", "when", "you're", "smiling", "at", "a", "friend" ] alignment = [ "C", "C", "C", "C", "C", "C", "C", "C", "S", "I", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C" ] expected = ' '.join([ "in", "our", "study", "they", "are", "more", "positive", "but", "they're", "also", "more", "likely", "than", "younger", "people", "to", "experience", "mixed", "emotions", "sadness", "at", "the", "same", "time", "you", "experience", "happiness", "you", "know", "that", "tear", "in", "the", "eye", "when", "you're", "smiling", "at", "a", "friend" ]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_getNormOptions_contract_dont(self): actual = HypothesisNormalizer.getNormOptions("don't") actual = set(itertools.chain(*actual.values())) expected = set(['do not']) self.assertEqual(actual, expected)
def test_normalize_fifty_year_old(self): hyp = "fifty year old" ref = "50 year old" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, ref)
def test_normalize_middle_age(self): ref = "than middle-aged" hyp = "the middle age" expected = "the middle age" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, expected)
def test_getNormOptions_number_2012(self): expected = set(['two thousand twelve', 'twenty twelve']) actual = HypothesisNormalizer.getNormOptions("2012") actual = set(itertools.chain(*actual.values())) self.assertEqual(actual, expected)
def test_normalize_abbrev_wrong(self): ref_aligned = [ "So", "we", "learned", "the", "majority", "of", "anatomic", "classes", "taught", "they", "do", "not", "have", "a", "cadaver", "dissection", "lab", ] hyp_aligned = [ "so", "we", "learned", "the", "most jury p.", "o.", "anatomy", "class", "called", "they", "do", "not", "have", "", "had ever", "dissection", "lead", ] alignment = [ "C", "C", "C", "C", "S", "S", "S", "S", "S", "C", "C", "C", "C", "D", "S", "C", "S", ] expected = [ "So", "we", "learned", "the", "most jury p.", "o.", "anatomy", "class", "called", "they", "do", "not", "have", "", "had ever", "dissection", "lead", ] expected = ' '.join([x for x in expected if x]) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align, fix_casing=True) self.assertEqual(actual, expected)
def test_normalize_alignment_ex1(self): ref_aligned = [ "Our", "digital", "body", "is", "one-to-one", "", "", "life", "size", "so", "this", "is", "exactly", "the", "way", "students", "will", "see", "the", "real", "anatomy", ] hyp_aligned = [ "our", "peaceful", "body", "is", "one two", "one", "life", "life", "size", "so", "this", "is", "exactly", "the", "way", "students", "would", "see", "the", "real", "anatomy", ] alignment = [ "C", "S", "C", "C", "S", "I", "I", "C", "C", "C", "C", "C", "C", "C", "C", "C", "S", "C", "C", "C", "C", ] expected = ' '.join(hyp_aligned) expand_align = ExpandedAlignment(ref_aligned, hyp_aligned, alignment, lowercase=True) actual = HypothesisNormalizer.normalizeAligned(expand_align) self.assertEqual(actual, expected)
def test_getNormOptions_text_124(self): actual = HypothesisNormalizer.getNormOptions("one hundred twenty-four", extended=False) expected = set(['one hundred twenty four', '124']) actual = set(itertools.chain(*actual.values())) self.assertEqual(actual, expected)
def test_isDashEquivalent_false_back(self): hyp = "touch interactive" ref = "touch-interactive-" actual = HypothesisNormalizer.isDashEquivalent(hyp, ref) self.assertFalse(actual)
def test_isDashEquivalent(self): hyp = "touch interactive" ref = "touch-interactive" actual = HypothesisNormalizer.isDashEquivalent(hyp, ref) self.assertTrue(actual)
def test_getNormOptions_fifty(self): actual = HypothesisNormalizer.getNormOptions("fifty") actual = set(itertools.chain(*actual.values())) expected = set(['50']) self.assertEqual(actual, expected)
def test_normalize_words_hyphen(self): hyp = "two hundred thirty-four thousand" ref = "two hundred thirty four thousand" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, ref)
def test_normalizeHyphens_true(self): hyp = "one two one life" ref = "one-to-one" actual = HypothesisNormalizer.normalize(hyp, ref) expected = ref self.assertNotEqual(actual, expected)
def test_normalize_digits(self): hyp = "234000" ref = "two hundred thirty four thousand" actual = HypothesisNormalizer.normalize(hyp, ref) self.assertEqual(actual, ref)