def preprocess(text, tokenizer, preprocessor): """Lowercases, replaces urls, replaces usernames, and tokenizes""" return tokenizer( preprocessor(replace_number(replace_user(replace_url(text.lower())))))
def test_replace_at_user_does_nothing(self): input_text = 'now you can run for your life' actual_output = preprocess.replace_user(input_text) self.assertEqual(input_text, actual_output)
def preprocess(text, tokenizer, preprocessor): """Lowercases, replaces urls, replaces usernames, and tokenizes""" return tokenizer(preprocessor(replace_number(replace_user(replace_url(text.lower())))))
def test_replace_at_user(self): input_text = 'huh @hotlinebling' expected_output = 'huh user' actual_output = preprocess.replace_user(input_text) self.assertEqual(expected_output, actual_output)