예제 #1
0
    def test_strips_whitespace_and_punctuation(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        # leading whitespace and punctuation
        self.assertEqual(f('- foo'), 'foo')
        self.assertEqual(f(': foo bar'), 'foo bar')
        self.assertEqual(f(' _\tfoo'), 'foo')
        self.assertEqual(f(' . foo bar'), 'foo bar')

        # trailing punctuation
        self.assertEqual(f('foo. '), 'foo')
        self.assertEqual(f('foo bar !'), 'foo bar')
        self.assertEqual(f('foo ? '), 'foo')
        self.assertEqual(f('foo bar\t@ '), 'foo bar')

        # leading and trailing punctuation
        self.assertEqual(f('- foo.'), 'foo')
        self.assertEqual(f(' :foo bar !\t\t'), 'foo bar')
        self.assertEqual(f('#foo\t&'), 'foo')
        self.assertEqual(f(' ^  foo bar    $'), 'foo bar')
예제 #2
0
    def test_strips_punctuation(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        # leading punctuation
        self.assertEqual(f('-foo'), 'foo')
        self.assertEqual(f(':foo bar'), 'foo bar')
        self.assertEqual(f('_foo'), 'foo')
        self.assertEqual(f('.foo bar'), 'foo bar')

        # trailing punctuation
        self.assertEqual(f('foo.'), 'foo')
        self.assertEqual(f('foo bar!'), 'foo bar')
        self.assertEqual(f('foo?'), 'foo')
        self.assertEqual(f('foo bar@'), 'foo bar')

        # leading and trailing punctuation
        self.assertEqual(f('-foo.'), 'foo')
        self.assertEqual(f(':foo bar!'), 'foo bar')
        self.assertEqual(f('#foo&'), 'foo')
        self.assertEqual(f('^foo bar$'), 'foo bar')
예제 #3
0
    def test_strips_whitespace(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        # leading whitespace
        self.assertEqual(f(' foo'), 'foo')
        self.assertEqual(f(' foo bar'), 'foo bar')
        self.assertEqual(f('\tfoo'), 'foo')
        self.assertEqual(f('\tfoo bar'), 'foo bar')

        # trailing whitespace
        self.assertEqual(f('foo '), 'foo')
        self.assertEqual(f('foo bar '), 'foo bar')
        self.assertEqual(f('foo\t'), 'foo')
        self.assertEqual(f('foo bar\t'), 'foo bar')

        # leading and trailing whitespace
        self.assertEqual(f(' foo '), 'foo')
        self.assertEqual(f(' foo bar '), 'foo bar')
        self.assertEqual(f(' foo\t'), 'foo')
        self.assertEqual(f(' foo bar\t'), 'foo bar')
예제 #4
0
    def test_does_not_strip_matched_punctuation(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        # quotes
        #   leading and trailing
        #     " character
        self.assertEqual(f('"foo"'), '"foo"')
        self.assertEqual(f('"foo bar"'), '"foo bar"')
        #     ' character
        self.assertEqual(f("'foo'"), "'foo'")
        self.assertEqual(f("'foo bar'"), "'foo bar'")
        #   leading only
        #     " character
        self.assertEqual(f('"foo" bar'), '"foo" bar')
        #     ' character
        self.assertEqual(f("'foo' bar"), "'foo' bar")
        #   trailing only
        #     " character
        self.assertEqual(f('foo "bar"'), 'foo "bar"')
        #     ' character
        self.assertEqual(f("foo 'bar'"), "foo 'bar'")

        # parentheses
        #   leading and trailing
        #     () characters
        self.assertEqual(f('(foo)'), '(foo)')
        self.assertEqual(f('(foo bar)'), '(foo bar)')
        #     [] characters
        self.assertEqual(f("[foo]"), "[foo]")
        self.assertEqual(f("[foo bar]"), "[foo bar]")
        #   leading only
        #     () characters
        self.assertEqual(f('(foo) bar'), '(foo) bar')
        #     [] characters
        self.assertEqual(f("[foo] bar"), "[foo] bar")
        #   trailing only
        #     () characters
        self.assertEqual(f('foo (bar)'), 'foo (bar)')
        #     [] characters
        self.assertEqual(f("foo [bar]"), "foo [bar]")
예제 #5
0
    def test_leaves_internal_whitespace_alone(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        self.assertEqual(f('Some text  with   odd\n internal whitespace'),
                         'Some text  with   odd\n internal whitespace')
예제 #6
0
    def test_leaves_normal_text_alone(self):
        f = normalizers.StripWhitespaceAndPunctuationNormalizer()

        self.assertEqual(f('foo'), 'foo')
        self.assertEqual(f('foo bar'), 'foo bar')