def test_find_expanded_acronyms(self): acronyms = find_expanded_acronyms('Howdy Neighbor') self.assertEqual(len(acronyms), 0) acronyms = find_expanded_acronyms( 'Did you work on the Alpha Beta Company (ABC) project?') self.assertEqual(len(acronyms), 1) self.assertTrue('ABC' in acronyms) self.assertEqual(acronyms['ABC'], ['Alpha Beta Company']) expansions = ['Different Expansions Fail', 'Dubious Expressions Fly'] acronyms = find_expanded_acronyms('{0} (DEF) if {1} (DEF)'.format( expansions[0], expansions[1])) self.assertEqual(len(acronyms), 1) self.assertTrue('DEF' in acronyms) self.assertEqual(len(acronyms['DEF']), 2) self.assertListEqual(acronyms['DEF'], expansions) # Make sure that the same expansion doesn't show up multiple times acronyms = find_expanded_acronyms( '{0} (DEF) if {1} (DEF) {0} (DEF)'.format(expansions[0], expansions[1])) self.assertEqual(len(acronyms), 1) self.assertTrue('DEF' in acronyms) self.assertEqual(len(acronyms['DEF']), 2) self.assertListEqual(acronyms['DEF'], expansions) #TODO: Consider keeping a count of expansions. # Make sure that things like '..., Inc.' work ok acronyms = find_expanded_acronyms( 'I think he works at My Favorite Company, Inc. (MFCI), or at least he used to.' ) self.assertEqual(len(acronyms), 1) self.assertTrue('MFCI' in acronyms) self.assertEqual('My Favorite Company, Inc', acronyms['MFCI'][0])
def test_expanded_acronyms_sentence_leading_caps(self): acronyms = find_expanded_acronyms( 'The New Old Cat Factory (NOCF) is big.') self.assertEqual(len(acronyms), 1) self.assertTrue('NOCF' in acronyms) self.assertEqual(acronyms['NOCF'], ['New Old Cat Factory']) acronyms = find_expanded_acronyms( 'Come visit The New Old Dog Factory (TNODF)') self.assertEqual(len(acronyms), 1) self.assertTrue('TNODF' in acronyms) self.assertEqual(acronyms['TNODF'], ['The New Old Dog Factory'])
def test_acronyms_with_embedded_acronyms(self): text = 'The ALPha Beta Company (ALPBC) is a company. Yes, ALPBC is for realz. ' + \ 'More in Appendix Q.6 ALPha Beta Company (ALPBC).' acronyms = find_expanded_acronyms(text) self.assertEqual(len(acronyms), 1) self.assertTrue('ALPBC' in acronyms) self.assertEqual(len(acronyms['ALPBC']), 1) self.assertEqual(acronyms['ALPBC'], ['ALPha Beta Company']) text = 'The ALPha BeTa ComPany (ALPBC) is a company. Yes, ALPBC is for realz. ' + \ 'More in Appendix Q.6 ALPha BeTa ComPany (ALPBC).' acronyms = find_expanded_acronyms(text) self.assertEqual(len(acronyms), 1) self.assertTrue('ALPBC' in acronyms) self.assertEqual(len(acronyms['ALPBC']), 1) self.assertEqual(acronyms['ALPBC'], ['ALPha BeTa ComPany'])
def test_defined_but_not_used_acronyms(self): acronyms = find_expanded_acronyms( "The Alpha Beta Company (ABC) project was Really Really Good (RRG)" ) defined_acronyms = find_acronyms("ABC RRG LOL") self.assertEqual(len(acronyms), 2) self.assertEqual(len(defined_acronyms), 3) unused_acronyms = find_unused_acronyms(acronyms, defined_acronyms) self.assertEqual(len(unused_acronyms), 1) self.assertTrue('LOL' in unused_acronyms)
def test_pluralized_acronyms(self): acronyms = find_acronyms('There are four CDFs due today') self.assertEqual(len(acronyms), 1) self.assertTrue('CDF' in acronyms) self.assertEqual(acronyms['CDF'], ['']) acronyms = find_expanded_acronyms( 'The Corporate Data Files (CDFs) are missing.') self.assertEqual(len(acronyms), 1) self.assertTrue('CDF' in acronyms) self.assertEqual(acronyms['CDF'], ['Corporate Data Files'])
def test_should_not_span_expansions_across_any_line_breaks(self): acronyms = find_expanded_acronyms( 'Section 1.3.2 Alpha Beta Company\nThe Alpha Beta Company (ABC)') self.assertEqual(len(acronyms), 1) self.assertTrue('ABC' in acronyms) self.assertEqual(acronyms['ABC'], ['Alpha Beta Company']) acronyms = find_expanded_acronyms( 'Section 1.3.2 Alpha Beta Company\rThe Alpha Beta Company (ABC)') self.assertEqual(len(acronyms), 1) self.assertTrue('ABC' in acronyms) self.assertEqual(acronyms['ABC'], ['Alpha Beta Company']) # This one isn't strictly across line breaks, but the most common place for a dash to mess # something up would be in a heading/line break/new paragraph starting with that acronym # situation, including a dash or other tricksy punctuation. acronyms = find_expanded_acronyms( '1. New Good-Fair Company\rThe New Good-Fair Company (NGFC) always...' ) self.assertEqual(len(acronyms), 1) self.assertTrue('NGFC' in acronyms) self.assertEqual(acronyms['NGFC'], ['New Good-Fair Company'])
def test_catch_divided_expansions(self): correct_expansion = 'The Leading Article and Preposition Company, Inc' acronym = 'TLAPCI' divided_expansion = 'Preposition Company, Inc' text = "When I worked for {0} ({1})".format(correct_expansion, acronym) + \ ", we made sure that prepositions were never something we ended sentences with." match = re.search(expanded_regex, text) revised_expansion = fix_divided_expansion(divided_expansion, acronym, match) self.assertEqual(correct_expansion, revised_expansion) acronyms = find_expanded_acronyms(text) self.assertEqual(len(acronyms), 1) self.assertTrue(acronym in acronyms) self.assertEqual(correct_expansion, acronyms[acronym][0])
def test_sentence_leading_caps_strip(self): extraneous_expansion = 'The Noodle Flopper' expansion = 'Noodle Flopper' acronym = 'NF' self.assertEqual(strip_extraneous_words(extraneous_expansion, acronym), expansion) not_extraneous = 'The Leading Article Company' acronym = 'TLAC' self.assertEqual(strip_extraneous_words(not_extraneous, acronym), not_extraneous) text = 'Thus The Leading Article Company (TLAC) realized its need for following verbs.' self.assertEqual( strip_extraneous_words('Thus The Leading Article Company', acronym), not_extraneous) acronyms = find_expanded_acronyms(text) self.assertEqual(len(acronyms), 1) self.assertTrue('TLAC' in acronyms) self.assertEqual(acronyms['TLAC'], [not_extraneous])