Example #1
0
    def test_remove_hashtags(self):
        text = "Mi época favorita es el #TerrorRojo"

        self.assertEqual(
            tokenize(text),
            ["mi", "época", "favorita", "es", "el"]
        )
Example #2
0
    def test_remove_non_alphas(self):
        text = "Mi año favorito es 1789"

        self.assertEqual(tokenize(text), ["mi", "año", "favorito", "es"])
Example #3
0
 def test_converts_to_lower_case(self):
     self.assertEqual(tokenize("Hola mundo"), ["hola", "mundo"])
Example #4
0
    def test_reduce_length_with_both_cases(self):
        text = "AAAaaaaahhh"

        self.assertEqual(tokenize(text), ["aaaaaahhh"])
Example #5
0
    def test_reduce_length(self):
        text = "holaaaa a todossss"

        self.assertEqual(tokenize(text), ["holaaa", "a", "todosss"])
Example #6
0
    def test_remove_urls(self):
        text = "Por suerte no existía www.google.com"

        self.assertEqual(tokenize(text), ["por", "suerte", "no", "existía"])
Example #7
0
    def test_remove_handles(self):
        text = "Mi personaje favorito es @robespierre"

        self.assertEqual(tokenize(text), ["mi", "personaje", "favorito", "es"])