Ejemplo n.º 1
0
    def test_find_expanded_acronyms(self):
        acronyms = find_expanded_acronyms('Howdy Neighbor')
        self.assertEqual(len(acronyms), 0)

        acronyms = find_expanded_acronyms(
            'Did you work on the Alpha Beta Company (ABC) project?')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('ABC' in acronyms)
        self.assertEqual(acronyms['ABC'], ['Alpha Beta Company'])

        expansions = ['Different Expansions Fail', 'Dubious Expressions Fly']
        acronyms = find_expanded_acronyms('{0} (DEF) if {1} (DEF)'.format(
            expansions[0], expansions[1]))
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('DEF' in acronyms)
        self.assertEqual(len(acronyms['DEF']), 2)
        self.assertListEqual(acronyms['DEF'], expansions)

        # Make sure that the same expansion doesn't show up multiple times
        acronyms = find_expanded_acronyms(
            '{0} (DEF) if {1} (DEF) {0} (DEF)'.format(expansions[0],
                                                      expansions[1]))
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('DEF' in acronyms)
        self.assertEqual(len(acronyms['DEF']), 2)
        self.assertListEqual(acronyms['DEF'], expansions)
        #TODO:  Consider keeping a count of expansions.

        # Make sure that things like '..., Inc.' work ok
        acronyms = find_expanded_acronyms(
            'I think he works at My Favorite Company, Inc. (MFCI), or at least he used to.'
        )
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('MFCI' in acronyms)
        self.assertEqual('My Favorite Company, Inc', acronyms['MFCI'][0])
Ejemplo n.º 2
0
    def test_expanded_acronyms_sentence_leading_caps(self):
        acronyms = find_expanded_acronyms(
            'The New Old Cat Factory (NOCF) is big.')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('NOCF' in acronyms)
        self.assertEqual(acronyms['NOCF'], ['New Old Cat Factory'])

        acronyms = find_expanded_acronyms(
            'Come visit The New Old Dog Factory (TNODF)')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('TNODF' in acronyms)
        self.assertEqual(acronyms['TNODF'], ['The New Old Dog Factory'])
Ejemplo n.º 3
0
    def test_acronyms_with_embedded_acronyms(self):
        text = 'The ALPha Beta Company (ALPBC) is a company.  Yes, ALPBC is for realz.  ' + \
            'More in Appendix Q.6 ALPha Beta Company (ALPBC).'
        acronyms = find_expanded_acronyms(text)
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('ALPBC' in acronyms)
        self.assertEqual(len(acronyms['ALPBC']), 1)
        self.assertEqual(acronyms['ALPBC'], ['ALPha Beta Company'])

        text = 'The ALPha BeTa ComPany (ALPBC) is a company.  Yes, ALPBC is for realz.  ' + \
            'More in Appendix Q.6 ALPha BeTa ComPany (ALPBC).'

        acronyms = find_expanded_acronyms(text)
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('ALPBC' in acronyms)
        self.assertEqual(len(acronyms['ALPBC']), 1)
        self.assertEqual(acronyms['ALPBC'], ['ALPha BeTa ComPany'])
Ejemplo n.º 4
0
    def test_defined_but_not_used_acronyms(self):
        acronyms = find_expanded_acronyms(
            "The Alpha Beta Company (ABC) project was Really Really Good (RRG)"
        )
        defined_acronyms = find_acronyms("ABC RRG LOL")
        self.assertEqual(len(acronyms), 2)
        self.assertEqual(len(defined_acronyms), 3)

        unused_acronyms = find_unused_acronyms(acronyms, defined_acronyms)
        self.assertEqual(len(unused_acronyms), 1)
        self.assertTrue('LOL' in unused_acronyms)
Ejemplo n.º 5
0
    def test_pluralized_acronyms(self):
        acronyms = find_acronyms('There are four CDFs due today')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('CDF' in acronyms)
        self.assertEqual(acronyms['CDF'], [''])

        acronyms = find_expanded_acronyms(
            'The Corporate Data Files (CDFs) are missing.')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('CDF' in acronyms)
        self.assertEqual(acronyms['CDF'], ['Corporate Data Files'])
Ejemplo n.º 6
0
    def test_should_not_span_expansions_across_any_line_breaks(self):
        acronyms = find_expanded_acronyms(
            'Section 1.3.2 Alpha Beta Company\nThe Alpha Beta Company (ABC)')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('ABC' in acronyms)
        self.assertEqual(acronyms['ABC'], ['Alpha Beta Company'])

        acronyms = find_expanded_acronyms(
            'Section 1.3.2 Alpha Beta Company\rThe Alpha Beta Company (ABC)')
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('ABC' in acronyms)
        self.assertEqual(acronyms['ABC'], ['Alpha Beta Company'])

        # This one isn't strictly across line breaks, but the most common place for a dash to mess
        # something up would be in a heading/line break/new paragraph starting with that acronym
        # situation, including a dash or other tricksy punctuation.
        acronyms = find_expanded_acronyms(
            '1.  New Good-Fair Company\rThe New Good-Fair Company (NGFC) always...'
        )
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('NGFC' in acronyms)
        self.assertEqual(acronyms['NGFC'], ['New Good-Fair Company'])
Ejemplo n.º 7
0
    def test_catch_divided_expansions(self):
        correct_expansion = 'The Leading Article and Preposition Company, Inc'
        acronym = 'TLAPCI'
        divided_expansion = 'Preposition Company, Inc'
        text = "When I worked for {0} ({1})".format(correct_expansion, acronym) + \
         ", we made sure that prepositions were never something we ended sentences with."

        match = re.search(expanded_regex, text)
        revised_expansion = fix_divided_expansion(divided_expansion, acronym,
                                                  match)

        self.assertEqual(correct_expansion, revised_expansion)

        acronyms = find_expanded_acronyms(text)
        self.assertEqual(len(acronyms), 1)
        self.assertTrue(acronym in acronyms)
        self.assertEqual(correct_expansion, acronyms[acronym][0])
Ejemplo n.º 8
0
    def test_sentence_leading_caps_strip(self):
        extraneous_expansion = 'The Noodle Flopper'
        expansion = 'Noodle Flopper'
        acronym = 'NF'

        self.assertEqual(strip_extraneous_words(extraneous_expansion, acronym),
                         expansion)

        not_extraneous = 'The Leading Article Company'
        acronym = 'TLAC'

        self.assertEqual(strip_extraneous_words(not_extraneous, acronym),
                         not_extraneous)

        text = 'Thus The Leading Article Company (TLAC) realized its need for following verbs.'
        self.assertEqual(
            strip_extraneous_words('Thus The Leading Article Company',
                                   acronym), not_extraneous)

        acronyms = find_expanded_acronyms(text)
        self.assertEqual(len(acronyms), 1)
        self.assertTrue('TLAC' in acronyms)
        self.assertEqual(acronyms['TLAC'], [not_extraneous])