Exemple #1
0
 def test_dont_condense_whitespace(self):
     s1 = "new york mets - atlanta braves"
     s2 = "new york mets atlanta braves"
     p1 = StringProcessor.replace_non_letters_non_numbers_with_whitespace(
         s1)
     p2 = StringProcessor.replace_non_letters_non_numbers_with_whitespace(
         s2)
     self.assertNotEqual(p1, p2)
Exemple #2
0
 def test_replace_non_letters_non_numbers_with_whitespace(self):
     strings = ["new york mets - atlanta braves", "Cães danados", "New York //// Mets $$$", "Ça va?"]
     for string in strings:
         proc_string = StringProcessor.replace_non_letters_non_numbers_with_whitespace(string)
         regex = re.compile(r"(?ui)[\W]")
         for expr in regex.finditer(proc_string):
             self.assertEqual(expr.group(), " ")
Exemple #3
0
 def test_replace_non_letters_non_numbers_with_whitespace(self):
     strings = ["new york mets - atlanta braves", "Cães danados",
                "New York //// Mets $$$", "Ça va?"]
     for string in strings:
         proc_string = StringProcessor.replace_non_letters_non_numbers_with_whitespace(string)
         regex = re.compile(r"(?ui)[\W]")
         for expr in regex.finditer(proc_string):
             self.assertEqual(expr.group(), " ")
Exemple #4
0
def full_process(s, force_ascii=False):
    """Process string by
        -- removing all but letters and numbers
        -- trim whitespace
        -- force to lower case
        if force_ascii == True, force convert to ascii"""

    if force_ascii:
        s = asciidammit(s)
    # Keep only Letters and Numbers (see Unicode docs).
    string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
    # Force into lowercase.
    string_out = StringProcessor.to_lower_case(string_out)
    # Remove leading and trailing whitespaces.
    string_out = StringProcessor.strip(string_out)
    return string_out
Exemple #5
0
 def test_dont_condense_whitespace(self):
     s1 = "new york mets - atlanta braves"
     s2 = "new york mets atlanta braves"
     p1 = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s1)
     p2 = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s2)
     self.assertNotEqual(p1, p2)