def preprocess(text, tokenizer, preprocessor): """Lowercases, replaces urls, replaces usernames, and tokenizes""" return tokenizer( preprocessor(replace_number(replace_user(replace_url(text.lower())))))
def test_replaces_http_urls(self): input_text = 'http://www.blah.com' actual_output = preprocess.replace_url(input_text) self.assertEqual(self.sentinel_value, actual_output)
def preprocess(text, tokenizer, preprocessor): """Lowercases, replaces urls, replaces usernames, and tokenizes""" return tokenizer(preprocessor(replace_number(replace_user(replace_url(text.lower())))))
def test_replaces_urls_with_info_after_tld(self): input_text = 'https://www.t.co/kljkj787310#11/' actual_output = preprocess.replace_url(input_text) self.assertEqual(self.sentinel_value, actual_output)