def test_str(self): result = utils.strip_punctuation_and_non_ascii( 'Hello, here\'s a list : - one! * two; + three') self.assertEqual(result, 'Hello heres a list one two three') result = utils.strip_punctuation_and_non_ascii( 'Watch=these, / ^other $chars@get (removed) `too\r &&') self.assertEqual(result, 'Watchthese other charsget removed too\r ')
def test_unicode(self): result = utils.strip_punctuation_and_non_ascii( u'Hello‚ here’s å lˆst – □ oñe▶ ● two; +◇ thrée') self.assertEqual(result, u'Hello heres lst oe two thre') result = utils.strip_punctuation_and_non_ascii( u'Watch‐these, • ‘other ▹chars‒get “removed” `too\r ■') self.assertEqual(result, u'Watchthese other charsget removed too\r ')
def test_str(self): result = utils.strip_punctuation_and_non_ascii("Hello, here's a list : - one! * two; + three") self.assertEqual(result, "Hello heres a list one two three") result = utils.strip_punctuation_and_non_ascii("Watch=these, / ^other $chars@get (removed) `too\r &&") self.assertEqual(result, "Watchthese other charsget removed too\r ")
def test_unicode(self): result = utils.strip_punctuation_and_non_ascii(u"Hello‚ here’s å lˆst – □ oñe▶ ● two; +◇ thrée") self.assertEqual(result, u"Hello heres lst oe two thre") result = utils.strip_punctuation_and_non_ascii(u"Watch‐these, • ‘other ▹chars‒get “removed” `too\r ■") self.assertEqual(result, u"Watchthese other charsget removed too\r ")
def clean(self, val): return len(utils.strip_punctuation_and_non_ascii(val).split())