Esempio n. 1
0
    def test_str(self):
        result = utils.strip_punctuation_and_non_ascii(
            'Hello, here\'s a list : - one! * two; + three')
        self.assertEqual(result, 'Hello heres a list   one  two  three')

        result = utils.strip_punctuation_and_non_ascii(
            'Watch=these, / ^other $chars@get (removed) `too\r &&')
        self.assertEqual(result, 'Watchthese  other charsget removed too\r ')
Esempio n. 2
0
    def test_unicode(self):
        result = utils.strip_punctuation_and_non_ascii(
            u'Hello‚ here’s å lˆst – □ oñe▶ ● two; +◇ thrée')
        self.assertEqual(result, u'Hello heres  lst   oe  two  thre')

        result = utils.strip_punctuation_and_non_ascii(
            u'Watch‐these, • ‘other ▹chars‒get “removed” `too\r ■')
        self.assertEqual(result, u'Watchthese  other charsget removed too\r ')
Esempio n. 3
0
    def test_str(self):
        result = utils.strip_punctuation_and_non_ascii("Hello, here's a list : - one! * two; + three")
        self.assertEqual(result, "Hello heres a list   one  two  three")

        result = utils.strip_punctuation_and_non_ascii("Watch=these, / ^other $chars@get (removed) `too\r &&")
        self.assertEqual(result, "Watchthese  other charsget removed too\r ")
Esempio n. 4
0
    def test_unicode(self):
        result = utils.strip_punctuation_and_non_ascii(u"Hello‚ here’s å lˆst – □ oñe▶ ● two; +◇ thrée")
        self.assertEqual(result, u"Hello heres  lst   oe  two  thre")

        result = utils.strip_punctuation_and_non_ascii(u"Watch‐these, • ‘other ▹chars‒get “removed” `too\r ■")
        self.assertEqual(result, u"Watchthese  other charsget removed too\r ")
Esempio n. 5
0
 def clean(self, val):
   return len(utils.strip_punctuation_and_non_ascii(val).split())
Esempio n. 6
0
 def clean(self, val):
   return len(utils.strip_punctuation_and_non_ascii(val).split())