def test_phone(self): data = [[u"Has a phone +351 99 234 23",u"Has a phone 1"], [u"Has a phone +3519923423",u"Has a phone 1"], [u"Has a phone +351 9923423",u"Has a phone 1"], ] for i,(text, truth) in enumerate(data): with self.subTest(i=i): new_text = replace_phone("1",text) self.assertEqual(new_text,truth)
def word_count(text, lang=None): # Remove all tags from text text_no_tags,_ = remove_tags(text) # Remove urls, emails and other breakable elements. text_no_tags = replace_url(u'url', text_no_tags) text_no_tags = replace_email(u'email', text_no_tags) text_no_tags = replace_date(u'date', text_no_tags) text_no_tags = replace_phone(u'phone', text_no_tags) text_no_tags = replace_money(u'money', text_no_tags) if lang is not None and lang not in asian_languages: words = word_count_aux(text_no_tags) return words else: asian_chars = sum([is_asian(x) for x in text_no_tags]) non_asian_words = "".join([filter_jchars(c) for c in text_no_tags]) words = word_count_aux(non_asian_words) return words + asian_chars