Beispiel #1
0
 def test_filters_punctuation_correctly(self):
     filter_ = RegexFilter(patterns=[r"\(|\)|\"|\."])
     expected_tokens = [
         "this", "45", "sentence", "has", "9943434", "punctuations"
     ]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #2
0
 def test_filters_digits_correctly(self):
     filter_ = RegexFilter(patterns=[r"\d+"])
     expected_tokens = [
         "this", "(", "sentence", ")", "has", '"', "punctuations", '"', "."
     ]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #3
0
 def test_filters_no_match_correctly(self):
     filter_ = RegexFilter(patterns=[r'&'])
     expected_tokens = [
         "this", "45", "(", "sentence", ")", "has", "9943434", '"',
         "punctuations", '"', '.'
     ]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #4
0
 def test_filters_multiple_patterns_correctly(self):
     filter_ = RegexFilter(patterns=[r'\(|\)|\"|\.', r'[\d+]'])
     expected_tokens = ["this", "sentence", "has", "punctuations"]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #5
0
 def test_filters_no_match_correctly(self):
     filter_ = RegexFilter(patterns=[r'&'])
     expected_tokens = ["this", "45", "(", "sentence", ")", "has", "9943434", '"', "punctuations", '"', '.']
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #6
0
 def test_filters_multiple_patterns_correctly(self):
     filter_ = RegexFilter(patterns=[r'\(|\)|\"|\.', r'[\d+]'])
     expected_tokens = ["this", "sentence", "has", "punctuations"]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens
Beispiel #7
0
 def test_filters_punctuation_correctly(self):
     filter_ = RegexFilter(patterns=[r'\(|\)|\"|\.'])
     expected_tokens = ["this", "45", "sentence", "has", "9943434", "punctuations"]
     tokens = [t.text for t in filter_.filter_words(self.sentence)]
     assert tokens == expected_tokens