def test_normalize_text_with_map(self): src = 'One one Bankr. E.D.N.C. two two two.' dst, mp = normalize_text_with_map(src, lowercase=False, use_stemmer=False) simply_normalized = normalize_text(src, lowercase=False, use_stemmer=False) self.assertEqual(' One one Bankr . E . D . N . C . two two two . ', dst) self.assertEqual(simply_normalized, dst) # pylint:disable=pointless-string-statement """ 1 2 3 4 01234567890123456789012345678901234567890123456 One one Bankr. E.D.N.C. two two two. One one Bankr . E . D . N . C . two two two . """ # mp_str = ','.join([str(i) for i in mp]) self.assertEqual(1, mp[0]) # 'One' moved to ' One' self.assertEqual(2, mp[1]) self.assertEqual(17, mp[15]) # 'E.' self.assertEqual(33, mp[24]) # first 'two' self.assertEqual(45, mp[35]) # final '.' self.assertEqual(16, mp[14]) # space between 'Bankr.' and 'E.'
def test_reverse_src_to_dest_map(self): src = 'One one Bankr. E.D.N.C. two two two.' dst, mp = normalize_text_with_map(src, lowercase=False, use_stemmer=False) self.assertEqual(len(src), len(mp)) # pylint:disable=redefined-builtin reversed = reverse_src_to_dest_map(mp, len(dst)) self.assertEqual(len(dst), len(reversed)) # rev_str = ','.join([str(i) for i in reversed]) self.assertEqual(8, reversed[9]) # 'Bankr.' self.assertEqual(15, reversed[17]) # 'E . D . N . C . self.assertEqual(24, reversed[33]) # first 'two' self.assertEqual(32, reversed[41]) # last 'two' self.assertEqual(35, reversed[45]) # last '.' self.assertEqual(reversed[-2], reversed[-1]) # last useless space
def test_normalize_text_extra_spaced(self): src = 'One one Bankr. E.D.N.C. two two two.' dst, mp = normalize_text_with_map(src, lowercase=False, use_stemmer=False) simply_normalized = normalize_text(src, lowercase=False, use_stemmer=False) self.assertEqual(simply_normalized, dst) # pylint:disable=pointless-string-statement """ 1 2 3 4 01234567890123456789012345678901234567890123456 One one Bankr. E.D.N.C. two two two. One one Bankr . E . D . N . C . two two two . """ self.assertEqual(1, mp[0]) # 'One' moved to ' One' self.assertEqual(17, mp[16]) # 'E.' self.assertEqual(33, mp[25]) # first 'two' self.assertEqual(45, mp[36]) # final '.'