예제 #1
0
class TestRegex(unittest.TestCase):
    """Tests for TrieRegEx.regex()"""
    def setUp(self):
        self.tre = TRE()
        self.words = ['p', 'pe', 'pea', 'pear']
        self.more_words = [
            'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu',
            'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine',
            'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint',
            'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley',
            'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic',
            'onion', 'galangal'
        ]

    def test_match_all_incrementals(self):
        self.tre.add(*self.words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))

        self.assertEqual(sorted(found), sorted(self.words))

    def test_does_not_match_larger_string(self):
        self.tre.add('p')
        found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe')
        self.assertEqual(found, [])

    def test_does_not_match_substring(self):
        my_words = self.words[1:]  # leave out 'p'
        self.tre.add(*my_words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))
        self.assertEqual(found, sorted(my_words), "'p' should not be captured")

    def test_empty_trie_returns_empty_string_regex(self):
        self.assertEqual(self.tre.regex(), '')

    def test_match_all_words(self):
        self.tre.add(*self.more_words)
        pattern = f'\\b{self.tre.regex()}\\b'
        found = re.findall(pattern, ' '.join(self.more_words))
        self.assertEqual(sorted(found), sorted(self.more_words))

    def test_match_all_words_surrounded_by_spaces(self):
        words = sorted(self.more_words)
        self.tre.add(*words)
        found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words))
        self.assertEqual(
            found, words[1:-1],
            'First and last item in sorted words list should not be matched.')

    def test_added_word_reflected_in_new_regex_call(self):
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?')

    def test_removed_word_reflected_in_new_regex_call(self):
        expanded = self.words + ['peak']
        self.tre.add(*expanded)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?')

    def test_multiple_adding_removing_reflected(self):
        """This test also checks that the memoizer cache clearing is called
        in the right places so that .add(), .remove(), and .regex() run
        correctly as expected
        """
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.tre.remove('pe')
        self.tre.add('river')
        self.tre.add('rich')
        self.tre.remove('pea')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)')
        self.tre.add('peak')
        self.tre.remove('peak')
        self.tre.remove('pear')
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')
예제 #2
0
class TestTrieRegEx(unittest.TestCase):
    """Basic tests for each function in the trieregex.TrieRegEx class. 
    More in-depth tests are located in files bearing their function names.
    """
    def setUp(self):
        self.words = [
            'heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change'
        ]
        self.tre = TRE(*self.words)

    def test_add(self):
        self.assertEqual(
            self.tre._trie, {
                'c': {
                    'h': {
                        'a': {
                            'n': {
                                'g': {
                                    'e': {
                                        '**': {}
                                    }
                                }
                            }
                        }
                    }
                },
                'l': {
                    'a': {
                        'r': {
                            'k': {
                                '**': {}
                            }
                        }
                    },
                    'o': {
                        'o': {
                            'k': {
                                '**': {}
                            }
                        }
                    }
                },
                'h': {
                    'e': {
                        'a': {
                            'l': {
                                't': {
                                    'h': {
                                        'y': {
                                            '**': {}
                                        }
                                    }
                                }
                            },
                            'r': {
                                't': {
                                    '**': {}
                                }
                            }
                        }
                    }
                },
                'p': {
                    'e': {
                        'a': {
                            'c': {
                                'h': {
                                    '**': {}
                                }
                            },
                            'r': {
                                '**': {}
                            }
                        }
                    }
                }
            }, "Words were not added to the trie (._trie) properly")

    def test_remove(self):
        self.tre = TRE(*self.words)
        self.tre.remove('healthy', 'change')
        self.assertEqual(
            self.tre._trie, {
                'l': {
                    'a': {
                        'r': {
                            'k': {
                                '**': {}
                            }
                        }
                    },
                    'o': {
                        'o': {
                            'k': {
                                '**': {}
                            }
                        }
                    }
                },
                'h': {
                    'e': {
                        'a': {
                            'r': {
                                't': {
                                    '**': {}
                                }
                            }
                        }
                    }
                },
                'p': {
                    'e': {
                        'a': {
                            'c': {
                                'h': {
                                    '**': {}
                                }
                            },
                            'r': {
                                '**': {}
                            }
                        }
                    }
                }
            },
            "'healthy' and 'change' were not properly removed from the trie")

    def test_has(self):
        for word in self.words:
            self.assertTrue(self.tre.has(word),
                            f"'{word}' should be searchable in trie")
        non_existing = ['hear', 'ear', 'each', 'hang', 'ok', 'heal', 'pa']
        for word in non_existing:
            self.assertFalse(self.tre.has(word),
                             f"'{word}' should not be searchable in trie")

    def test_initials_variable(self):
        self.assertEqual(
            self.tre._initials,
            {
                'c': 1,
                'h': 2,
                'l': 2,
                'p': 2
            },
        )

    def test_initials(self):
        self.assertEqual(self.tre.initials(), ['c', 'h', 'l', 'p'])

    def test_finals_variable(self):
        self.assertEqual(self.tre._finals, {
            'e': 1,
            'h': 1,
            'k': 2,
            'r': 1,
            't': 1,
            'y': 1
        })

    def test_finals(self):
        self.assertEqual(self.tre.finals(), ['e', 'h', 'k', 'r', 't', 'y'])

    def test_regex(self):
        self.assertEqual(self.tre.regex(),
                         "(?:hea(?:lthy|rt)|l(?:ark|ook)|pea(?:ch|r)|change)")