def test_del(self): db = PrefixTrie() db["00"] = 1 db["001"] = 2 db["002"] = 3 db["0011"] = 4 self.assertRaises(KeyError, db.__delitem__, "b") self.assertEqual(db["001"], 2) self.assertSequenceEqual(db.longest_prefix("0015"), ("001", 2)) del db["001"] self.assertRaises(KeyError, db.__getitem__, "001") self.assertEqual(db["0011"], 4) self.assertSequenceEqual(db.longest_prefix("0015"), ("00", 1))
def test_prefix(self): db = PrefixTrie() db["an"] = 0 db["ant"] = 1 db["all"] = 2 db["allot"] = 3 db["alloy"] = 4 db["aloe"] = 5 db["are"] = 6 db["be"] = 7 self.assertSequenceEqual(db.longest_prefix("antonym"), ("ant", 1)) self.assertSequenceEqual(db.longest_prefix("any"), ("an", 0)) self.assertSequenceEqual(db.longest_prefix("are"), ("are", 6)) self.assertRaises(KeyError, db.longest_prefix, "alsa") self.assertRaises(KeyError, db.longest_prefix, "b")
def main(): start = time.time() lexicon = set(get_lexicon()) print '%0.2f sec to fetch unix lexicon, %d words' % (time.time() - start, len(lexicon)) print 'avg word length: %d' % counter_avg(Counter(len(w) for w in lexicon)) start = time.time() prefix_trie = PrefixTrie(lexicon) print '%0.2f sec to make prefix trie, %d paths' % ( time.time() - start, len(prefix_trie.get_paths())) # We'll be lazy and make a suffix trie by using reverse words and reverse # lookup paths suffix_trie = SuffixTrie(lexicon) start = time.time() substring_trie = SubstringTrie(lexicon) print '%0.2f sec to make substring trie, %d paths' % ( time.time() - start, len(substring_trie.get_paths())) MIN_SUBSTR_LEN = 3 substrings = [] start = time.time() for word in lexicon: for start_index in range(0, len(word) - 1): for end_index in range(start_index + MIN_SUBSTR_LEN, len(word)): substrings.append(word[start_index:end_index]) substring_counts = Counter(substrings) print '%0.2f sec to tally substring counts' % (time.time() - start, ) substring_len_counts = Counter([len(w) for w in substring_counts.keys()]) most_common_substrs = most_common(substring_counts, n=10) least_common_substrs = most_common(substring_counts, n=10, least=True, min_count=10) print most_common_substrs print least_common_substrs print substring_trie.fetch('aar') print substring_counts['aar'] print substring_trie.fetch('ati')[:100] print substring_trie.fetch('tillatio') print substring_trie.fetch('naest') print substring_trie.fetch('nctil')
def main(): start = time.time() lexicon = set(get_lexicon()) print '%0.2f sec to fetch unix lexicon, %d words' % ( time.time() - start, len(lexicon)) print 'avg word length: %d' % counter_avg(Counter(len(w) for w in lexicon)) start = time.time() prefix_trie = PrefixTrie(lexicon) print '%0.2f sec to make prefix trie, %d paths' % ( time.time() - start, len(prefix_trie.get_paths())) # We'll be lazy and make a suffix trie by using reverse words and reverse # lookup paths suffix_trie = SuffixTrie(lexicon) start = time.time() substring_trie = SubstringTrie(lexicon) print '%0.2f sec to make substring trie, %d paths' % ( time.time() - start, len(substring_trie.get_paths())) MIN_SUBSTR_LEN = 3 substrings = [] start = time.time() for word in lexicon: for start_index in range(0, len(word) - 1): for end_index in range(start_index + MIN_SUBSTR_LEN, len(word)): substrings.append(word[start_index:end_index]) substring_counts = Counter(substrings) print '%0.2f sec to tally substring counts' % (time.time() - start,) substring_len_counts = Counter([len(w) for w in substring_counts.keys()]) most_common_substrs = most_common(substring_counts, n=10) least_common_substrs = most_common(substring_counts, n=10, least=True, min_count=10) print most_common_substrs print least_common_substrs print substring_trie.fetch('aar') print substring_counts['aar'] print substring_trie.fetch('ati')[:100] print substring_trie.fetch('tillatio') print substring_trie.fetch('naest') print substring_trie.fetch('nctil')
class DB(object): def __init__(self, default=IPv4): self._db = PrefixTrie() self._factory = default def __getstate__(self): return self._db def insert(self, ip, message=None): v = self._factory(ip) self._db[bin(v.network)[2:v.netmask + 1]] = (v, message) def __getitem__(self, ip): v = self._factory(ip) return self._db.longest_prefix(bin(v.network)[2:v.netmask + 1])[1] def __delitem__(self, ip): v = self._factory(ip) del self._db[bin(v.network)[2:v.netmask + 1]] def get(self, ip): return self.__getitem__(ip)
def test_insert(self): db = PrefixTrie() value = "string" db["test"] = value self.assertEqual(db["test"], value)
def __init__(self, default=IPv4): self._db = PrefixTrie() self._factory = default