Ejemplo n.º 1
0
def preprocess(text, tokenizer, preprocessor):
    """Lowercases, replaces urls, replaces usernames, and tokenizes"""
    return tokenizer(
        preprocessor(replace_number(replace_user(replace_url(text.lower())))))
 def test_replaces_http_urls(self):
     input_text = 'http://www.blah.com'
     actual_output = preprocess.replace_url(input_text)
     self.assertEqual(self.sentinel_value, actual_output)
Ejemplo n.º 3
0
def preprocess(text, tokenizer, preprocessor):
    """Lowercases, replaces urls, replaces usernames, and tokenizes"""
    return tokenizer(preprocessor(replace_number(replace_user(replace_url(text.lower())))))
 def test_replaces_urls_with_info_after_tld(self):
     input_text = 'https://www.t.co/kljkj787310#11/'
     actual_output = preprocess.replace_url(input_text)
     self.assertEqual(self.sentinel_value, actual_output)