def test_remove_one_in_multiple_shared(self): tre = TRE('brander', 'brandy', 'brandless') tre.remove('brandless') self.assertEqual( # "hard" check tre._trie, {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}}, 'e': {'r': {'**': {}}}}}}}}}, "'brandless' should have been removed (hard check)" ) self.assertEqual( # "soft" check tre._trie, TRE('brander', 'brandy')._trie, "'brandless' should have been removed (soft check)" )
class TestHas(unittest.TestCase): """Tests for TrieRegEx.has()""" def setUp(self): self.words = ['heal', 'health', 'healthy', 'healthier', 'healthiest'] self.tre = TRE(*self.words) def test_existing_longest_word(self): self.assertTrue(self.tre.has('healthiest')) def test_existing_substring_word(self): self.assertTrue(self.tre.has('health')) self.assertTrue(self.tre.has('heal')) def test_nonexisting(self): self.assertFalse(self.tre.has('wound')) def test_nonword_substring_of_existing_word(self): self.assertFalse(self.tre.has('he')) def test_nonexisting_after_removed(self): """Also a test of the TrieRegEx.remove() function """ self.assertTrue(self.tre.has('healthy'), "'healthy' must first exist in trie") self.tre.remove('healthy') self.assertFalse(self.tre.has('healthy')) def test_existing_after_added(self): """Also a test of the TrieRegEx.add() function """ self.assertFalse(self.tre.has('settled'), "'settled' must first NOT exist in trie") self.tre.add('settled') self.assertTrue(self.tre.has('settled')) def test_empty_string(self): self.assertFalse(self.tre.has(''))
class TestRegex(unittest.TestCase): """Tests for TrieRegEx.regex()""" def setUp(self): self.tre = TRE() self.words = ['p', 'pe', 'pea', 'pear'] self.more_words = [ 'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu', 'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine', 'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint', 'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley', 'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic', 'onion', 'galangal' ] def test_match_all_incrementals(self): self.tre.add(*self.words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(sorted(found), sorted(self.words)) def test_does_not_match_larger_string(self): self.tre.add('p') found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe') self.assertEqual(found, []) def test_does_not_match_substring(self): my_words = self.words[1:] # leave out 'p' self.tre.add(*my_words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(found, sorted(my_words), "'p' should not be captured") def test_empty_trie_returns_empty_string_regex(self): self.assertEqual(self.tre.regex(), '') def test_match_all_words(self): self.tre.add(*self.more_words) pattern = f'\\b{self.tre.regex()}\\b' found = re.findall(pattern, ' '.join(self.more_words)) self.assertEqual(sorted(found), sorted(self.more_words)) def test_match_all_words_surrounded_by_spaces(self): words = sorted(self.more_words) self.tre.add(*words) found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words)) self.assertEqual( found, words[1:-1], 'First and last item in sorted words list should not be matched.') def test_added_word_reflected_in_new_regex_call(self): self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?') def test_removed_word_reflected_in_new_regex_call(self): expanded = self.words + ['peak'] self.tre.add(*expanded) self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?', 'Setup for the real test in the next assertEqual') self.tre.remove('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?') def test_multiple_adding_removing_reflected(self): """This test also checks that the memoizer cache clearing is called in the right places so that .add(), .remove(), and .regex() run correctly as expected """ self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.tre.remove('pe') self.tre.add('river') self.tre.add('rich') self.tre.remove('pea') self.tre.remove('peak') self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)') self.tre.add('peak') self.tre.remove('peak') self.tre.remove('pear') self.tre.add(*self.words) self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')
class TestTrieRegEx(unittest.TestCase): """Basic tests for each function in the trieregex.TrieRegEx class. More in-depth tests are located in files bearing their function names. """ def setUp(self): self.words = [ 'heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change' ] self.tre = TRE(*self.words) def test_add(self): self.assertEqual( self.tre._trie, { 'c': { 'h': { 'a': { 'n': { 'g': { 'e': { '**': {} } } } } } }, 'l': { 'a': { 'r': { 'k': { '**': {} } } }, 'o': { 'o': { 'k': { '**': {} } } } }, 'h': { 'e': { 'a': { 'l': { 't': { 'h': { 'y': { '**': {} } } } }, 'r': { 't': { '**': {} } } } } }, 'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } } }, "Words were not added to the trie (._trie) properly") def test_remove(self): self.tre = TRE(*self.words) self.tre.remove('healthy', 'change') self.assertEqual( self.tre._trie, { 'l': { 'a': { 'r': { 'k': { '**': {} } } }, 'o': { 'o': { 'k': { '**': {} } } } }, 'h': { 'e': { 'a': { 'r': { 't': { '**': {} } } } } }, 'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } } }, "'healthy' and 'change' were not properly removed from the trie") def test_has(self): for word in self.words: self.assertTrue(self.tre.has(word), f"'{word}' should be searchable in trie") non_existing = ['hear', 'ear', 'each', 'hang', 'ok', 'heal', 'pa'] for word in non_existing: self.assertFalse(self.tre.has(word), f"'{word}' should not be searchable in trie") def test_initials_variable(self): self.assertEqual( self.tre._initials, { 'c': 1, 'h': 2, 'l': 2, 'p': 2 }, ) def test_initials(self): self.assertEqual(self.tre.initials(), ['c', 'h', 'l', 'p']) def test_finals_variable(self): self.assertEqual(self.tre._finals, { 'e': 1, 'h': 1, 'k': 2, 'r': 1, 't': 1, 'y': 1 }) def test_finals(self): self.assertEqual(self.tre.finals(), ['e', 'h', 'k', 'r', 't', 'y']) def test_regex(self): self.assertEqual(self.tre.regex(), "(?:hea(?:lthy|rt)|l(?:ark|ook)|pea(?:ch|r)|change)")
class TestInitials(unittest.TestCase): """Tests for TrieRegEx.initials() and TrieRegEx._initials""" def setUp(self): self.words = [ 'all', 'the', 'stars', 'we', 'steal', 'from', 'night', 'sky', 'will', 'never', 'be', 'enough' ] self.tre = TRE(*self.words) def test_initials_variable(self): self.assertEqual( # "hard" check self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }) initials = defaultdict(int) # "soft" check for w in self.words: initials[w[0]] += 1 self.assertEqual(self.tre._initials, initials) def test_initials(self): self.assertEqual( # "hard" check self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 't', 'w']) self.assertEqual( # "soft" check self.tre.initials(), sorted(list(set([w[0] for w in self.words])))) def test_add_existing_word_will_not_change_counts(self): self.tre.add('the') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "key-value pairs should remain the same") def test_add_new_word_increase_frequency(self): self.tre.add('spotlights') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 4, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'s' should be set to 4 (up from 3)") def test_add_new_initial(self): self.tre.add('dream') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, 'd': 1 }, "new key 'd' should have a value of 1") def test_add_new_escaped_char(self): self.tre.add('\nnewline') self.assertEqual( self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, '\n': 1 }, "new key '\n' should have a value of 1") def test_add_new_special_char(self): self.tre.add('åll') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, 'å': 1 }, "new key 'å' should have a value of 1") def test_remove_word_lower_frequency(self): self.tre.remove('the') self.assertEqual(self.tre._initials, { 'a': 1, 't': 0, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'t' should have a value of 0 (down from 1)") def test_zero_frequency_should_not_appear(self): self.tre.remove('the') self.assertEqual(self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 'w'], "'t' should not appear in the list") def test_remove_nonexisting_initial_with_zero_frequency(self): self.tre.remove('the') # set 't': 1 -> 't': 0 self.tre.remove('table') # attempt removal of nonexisting word self.assertEqual(self.tre._initials, { 'a': 1, 't': 0, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'t' should still have a value of 0") def test_remove_all(self): self.tre.remove(*self.words) self.assertEqual(self.tre._initials, { 'a': 0, 't': 0, 's': 0, 'w': 0, 'f': 0, 'n': 0, 'b': 0, 'e': 0 }, "All keys should be set to a value of 0")
class TestRemove(unittest.TestCase): """Tests for TrieRegEx.remove()""" def setUp(self): self.words = ['heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change'] self.incrementals = ['p', 'pe', 'pea', 'pear'] self.tre = TRE(*self.words) self.tre_incr = TRE(*self.incrementals) def test_remove_one(self): self.tre.remove('healthy') self.assertEqual( # "hard" check self.tre._trie, { 'c': {'h': {'a': {'n': {'g': {'e': {'**': {}}}}}}}, 'l': {'a': {'r': {'k': {'**': {}}}}, 'o': {'o': {'k': {'**': {}}}}}, 'h': {'e': {'a': {'r': {'t': {'**': {}}}}}}, 'p': {'e': {'a': {'c': {'h': {'**': {}}}, 'r': {'**': {}}}}} }, "'healthy' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE(*[w for w in self.words if w != 'healthy'])._trie, "'healthy' should have been removed (soft check)" ) def test_remove_two(self): self.tre.remove('healthy', 'change') self.assertEqual( # "hard" check self.tre._trie, { 'l': {'a': {'r': {'k': {'**': {}}}}, 'o': {'o': {'k': {'**': {}}}}}, 'h': {'e': {'a': {'r': {'t': {'**': {}}}}}}, 'p': {'e': {'a': {'c': {'h': {'**': {}}}, 'r': {'**': {}}}}} }, "'healthy' and 'change' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE('lark', 'look', 'heart', 'peach', 'pear')._trie, "'healthy' and 'change' should have been removed (soft check)" ) def test_remove_all(self): self.tre.remove(*self.words) self.assertEqual(self.tre._trie, {}, 'Trie should be empty') def test_remove_second_time(self): self.tre.remove(*self.words) self.tre.add(*self.words) self.tre.remove(*[w for w in self.words if w != 'pear']) self.assertEqual( # "hard" check self.tre._trie, {'p': {'e': {'a': {'r': {'**': {}}}}}}, "Only 'pear' should be in trie (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE('pear')._trie, "Only 'pear' should be in trie (soft check)" ) def test_remove_first_in_incremental_words(self): self.tre_incr.remove('p') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'e': {'**': {}, 'a': {'**': {}, 'r': {'**': {}}}}}}, "'p' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('pe', 'pea', 'pear')._trie, "'p' should have been removed (soft check)" ) def test_remove_middle_in_incremental_words(self): self.tre_incr.remove('pea') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'r': {'**': {}}}}}}, "'pea' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('p', 'pe', 'pear')._trie, "'pea' should have been removed (soft check)" ) def test_remove_last_in_incremental_words(self): self.tre_incr.remove('pear') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'**': {}}}}}, "'pear' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('p', 'pe', 'pea')._trie, "'pear' should have been removed (soft check)" ) def test_remove_one_in_multiple_shared(self): tre = TRE('brander', 'brandy', 'brandless') tre.remove('brandless') self.assertEqual( # "hard" check tre._trie, {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}}, 'e': {'r': {'**': {}}}}}}}}}, "'brandless' should have been removed (hard check)" ) self.assertEqual( # "soft" check tre._trie, TRE('brander', 'brandy')._trie, "'brandless' should have been removed (soft check)" ) def test_remove_nonexisting_word(self): self.tre_incr.remove('riffraff') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'**': {}, 'r': {'**': {}}}}}}, "Trie should remain the same (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE(*self.incrementals)._trie, "Trie should remain the same (soft check)" )