Ejemplo n.º 1
0
    def test_removes_age_and_gender_in_parentheses(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        # age then gender
        #   upper case gender
        self.assertEqual(f('foo (27M) and bar (39F).'), 'foo and bar.')
        #   lower case gender
        self.assertEqual(f('foo (27m) and bar (39f).'), 'foo and bar.')
        #   alternative brackets
        self.assertEqual(f('foo {27M} and bar [39F].'), 'foo and bar.')
        #   optional space or punctuation
        self.assertEqual(f('foo (27 M) and bar (39 F).'), 'foo and bar.')
        self.assertEqual(f('foo (27, M) and bar (39, F).'), 'foo and bar.')
        self.assertEqual(f('foo (27.M) and bar (39.F).'), 'foo and bar.')
        self.assertEqual(f('foo (27:M) and bar (39:F).'), 'foo and bar.')

        # gender then age
        #   upper case gender
        self.assertEqual(f('foo (M27) and bar (F39).'), 'foo and bar.')
        #   lower case gender
        self.assertEqual(f('foo (m27) and bar (f39).'), 'foo and bar.')
        #   alternative brackets
        self.assertEqual(f('foo {M27} and bar [F39].'), 'foo and bar.')
        #   optional space or punctuation
        self.assertEqual(f('foo (M 27) and bar (F 39).'), 'foo and bar.')
        self.assertEqual(f('foo (M, 27) and bar (F, 39).'), 'foo and bar.')
        self.assertEqual(f('foo (M.27) and bar (F.39).'), 'foo and bar.')
        self.assertEqual(f('foo (M:27) and bar (F:39).'), 'foo and bar.')
Ejemplo n.º 2
0
    def test_leaves_parenthesis_without_age_gender_markers_alone(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        # single word in parentheses
        self.assertEqual(f('foo (bar)'), 'foo (bar)')

        # multiple words in parentheses
        self.assertEqual(f('foo (bar baz)'), 'foo (bar baz)')
Ejemplo n.º 3
0
    def test_leaves_normal_text_alone(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        self.assertEqual(f(''), '')
        self.assertEqual(f('foo'), 'foo')
        self.assertEqual(f('foo bar'), 'foo bar')
        self.assertEqual(f('Some normal text, i.e. a typical example.'),
                         'Some normal text, i.e. a typical example.')
Ejemplo n.º 4
0
    def test_leaves_non_age_numbers_alone(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        # common years (without parentheses)
        self.assertEqual(f('1970'), '1970')
        self.assertEqual(f('2019'), '2019')

        # common years (with parentheses)
        self.assertEqual(f('(1970)'), '(1970)')
        self.assertEqual(f('(2019)'), '(2019)')
Ejemplo n.º 5
0
    def test_removes_age_and_gender_without_parentheses(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        # age then gender
        #   upper case gender
        self.assertEqual(f('foo 27M and bar 39F.'), 'foo and bar.')
        #   lower case gender
        self.assertEqual(f('foo 27m and bar 39f.'), 'foo and bar.')

        # gender then age
        #   upper case gender
        self.assertEqual(f('foo M27 and bar F39.'), 'foo and bar.')
        #   lower case gender
        self.assertEqual(f('foo m27 and bar f39.'), 'foo and bar.')
Ejemplo n.º 6
0
    def test_removes_genders_in_parentheses(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        self.assertEqual(f('foo (m)'), 'foo')
        self.assertEqual(f('bar [f] baz'), 'bar baz')
        self.assertEqual(f('foo {m} and bar {f}'), 'foo and bar')
Ejemplo n.º 7
0
    def test_removes_ages_in_parentheses(self):
        f = normalizers.RemoveAgeGenderMarkersNormalizer()

        self.assertEqual(f('foo (20)'), 'foo')
        self.assertEqual(f('bar [19] baz'), 'bar baz')
        self.assertEqual(f('foo {7} and bar {4}'), 'foo and bar')