Ejemplo n.º 1
0
    def test_normalize_text_with_map(self):
        src = 'One one Bankr. E.D.N.C. two two two.'
        dst, mp = normalize_text_with_map(src,
                                          lowercase=False,
                                          use_stemmer=False)
        simply_normalized = normalize_text(src,
                                           lowercase=False,
                                           use_stemmer=False)

        self.assertEqual(' One one Bankr . E . D . N . C . two two two . ',
                         dst)
        self.assertEqual(simply_normalized, dst)

        # pylint:disable=pointless-string-statement
        """       1         2         3         4
        01234567890123456789012345678901234567890123456
        One one Bankr. E.D.N.C. two two two.
         One one Bankr . E . D . N . C . two two two . 
        """
        # mp_str = ','.join([str(i) for i in mp])
        self.assertEqual(1, mp[0])  # 'One' moved to ' One'
        self.assertEqual(2, mp[1])

        self.assertEqual(17, mp[15])  # 'E.'
        self.assertEqual(33, mp[24])  # first 'two'
        self.assertEqual(45, mp[35])  # final '.'

        self.assertEqual(16, mp[14])  # space between 'Bankr.' and 'E.'
Ejemplo n.º 2
0
    def test_normalize_text_extra_spaced(self):
        src = 'One one  Bankr. E.D.N.C. two two two.'
        dst, mp = normalize_text_with_map(src, lowercase=False, use_stemmer=False)
        simply_normalized = normalize_text(src, lowercase=False, use_stemmer=False)

        self.assertEqual(simply_normalized, dst)

        # pylint:disable=pointless-string-statement
        """       1         2         3         4
        01234567890123456789012345678901234567890123456
        One one  Bankr. E.D.N.C. two two two.
         One one Bankr . E . D . N . C . two two two . 
        """
        self.assertEqual(1, mp[0])  # 'One' moved to ' One'

        self.assertEqual(17, mp[16])  # 'E.'
        self.assertEqual(33, mp[25])  # first 'two'
        self.assertEqual(45, mp[36])  # final '.'