Ejemplo n.º 1
0
    def test_del(self):
        db = PrefixTrie()
        db["00"] = 1
        db["001"] = 2
        db["002"] = 3
        db["0011"] = 4
        self.assertRaises(KeyError, db.__delitem__, "b")

        self.assertEqual(db["001"], 2)
        self.assertSequenceEqual(db.longest_prefix("0015"), ("001", 2))

        del db["001"]
        self.assertRaises(KeyError, db.__getitem__, "001")
        self.assertEqual(db["0011"], 4)
        self.assertSequenceEqual(db.longest_prefix("0015"), ("00", 1))
Ejemplo n.º 2
0
    def test_prefix(self):
        db = PrefixTrie()
        db["an"] = 0
        db["ant"] = 1
        db["all"] = 2
        db["allot"] = 3
        db["alloy"] = 4
        db["aloe"] = 5
        db["are"] = 6
        db["be"] = 7

        self.assertSequenceEqual(db.longest_prefix("antonym"), ("ant", 1))
        self.assertSequenceEqual(db.longest_prefix("any"), ("an", 0))
        self.assertSequenceEqual(db.longest_prefix("are"), ("are", 6))
        self.assertRaises(KeyError, db.longest_prefix, "alsa")
        self.assertRaises(KeyError, db.longest_prefix, "b")
Ejemplo n.º 3
0
def main():
    start = time.time()
    lexicon = set(get_lexicon())
    print '%0.2f sec to fetch unix lexicon, %d words' % (time.time() - start,
                                                         len(lexicon))
    print 'avg word length: %d' % counter_avg(Counter(len(w) for w in lexicon))

    start = time.time()
    prefix_trie = PrefixTrie(lexicon)
    print '%0.2f sec to make prefix trie, %d paths' % (
        time.time() - start, len(prefix_trie.get_paths()))

    # We'll be lazy and make a suffix trie by using reverse words and reverse
    # lookup paths
    suffix_trie = SuffixTrie(lexicon)

    start = time.time()
    substring_trie = SubstringTrie(lexicon)
    print '%0.2f sec to make substring trie, %d paths' % (
        time.time() - start, len(substring_trie.get_paths()))

    MIN_SUBSTR_LEN = 3
    substrings = []
    start = time.time()
    for word in lexicon:
        for start_index in range(0, len(word) - 1):
            for end_index in range(start_index + MIN_SUBSTR_LEN, len(word)):
                substrings.append(word[start_index:end_index])
    substring_counts = Counter(substrings)
    print '%0.2f sec to tally substring counts' % (time.time() - start, )
    substring_len_counts = Counter([len(w) for w in substring_counts.keys()])

    most_common_substrs = most_common(substring_counts, n=10)
    least_common_substrs = most_common(substring_counts,
                                       n=10,
                                       least=True,
                                       min_count=10)
    print most_common_substrs
    print least_common_substrs

    print substring_trie.fetch('aar')
    print substring_counts['aar']
    print substring_trie.fetch('ati')[:100]
    print substring_trie.fetch('tillatio')
    print substring_trie.fetch('naest')
    print substring_trie.fetch('nctil')
Ejemplo n.º 4
0
def main():
  start = time.time()
  lexicon = set(get_lexicon())
  print '%0.2f sec to fetch unix lexicon, %d words' % (
    time.time() - start, len(lexicon))
  print 'avg word length: %d' % counter_avg(Counter(len(w) for w in lexicon))

  start = time.time()
  prefix_trie = PrefixTrie(lexicon)
  print '%0.2f sec to make prefix trie, %d paths' % (
    time.time() - start, len(prefix_trie.get_paths()))

  # We'll be lazy and make a suffix trie by using reverse words and reverse
  # lookup paths
  suffix_trie = SuffixTrie(lexicon)

  start = time.time()
  substring_trie = SubstringTrie(lexicon)
  print '%0.2f sec to make substring trie, %d paths' % (
    time.time() - start, len(substring_trie.get_paths()))

  MIN_SUBSTR_LEN = 3
  substrings = []
  start = time.time()
  for word in lexicon:
    for start_index in range(0, len(word) - 1):
      for end_index in range(start_index + MIN_SUBSTR_LEN, len(word)):
        substrings.append(word[start_index:end_index])
  substring_counts = Counter(substrings)
  print '%0.2f sec to tally substring counts' % (time.time() - start,)
  substring_len_counts = Counter([len(w) for w in substring_counts.keys()])

  most_common_substrs = most_common(substring_counts, n=10)
  least_common_substrs = most_common(substring_counts, n=10, least=True,
                                     min_count=10)
  print most_common_substrs
  print least_common_substrs

  print substring_trie.fetch('aar')
  print substring_counts['aar']
  print substring_trie.fetch('ati')[:100]
  print substring_trie.fetch('tillatio')
  print substring_trie.fetch('naest')
  print substring_trie.fetch('nctil')
Ejemplo n.º 5
0
class DB(object):
    def __init__(self, default=IPv4):
        self._db = PrefixTrie()
        self._factory = default

    def __getstate__(self):
        return self._db

    def insert(self, ip, message=None):
        v = self._factory(ip)
        self._db[bin(v.network)[2:v.netmask + 1]] = (v, message)

    def __getitem__(self, ip):
        v = self._factory(ip)
        return self._db.longest_prefix(bin(v.network)[2:v.netmask + 1])[1]

    def __delitem__(self, ip):
        v = self._factory(ip)
        del self._db[bin(v.network)[2:v.netmask + 1]]

    def get(self, ip):
        return self.__getitem__(ip)
Ejemplo n.º 6
0
class DB(object):

    def __init__(self, default=IPv4):
        self._db = PrefixTrie()
        self._factory = default

    def __getstate__(self):
        return self._db

    def insert(self, ip, message=None):
        v = self._factory(ip)
        self._db[bin(v.network)[2:v.netmask + 1]] = (v, message)

    def __getitem__(self, ip):
        v = self._factory(ip)
        return self._db.longest_prefix(bin(v.network)[2:v.netmask + 1])[1]

    def __delitem__(self, ip):
        v = self._factory(ip)
        del self._db[bin(v.network)[2:v.netmask + 1]]

    def get(self, ip):
        return self.__getitem__(ip)
Ejemplo n.º 7
0
 def test_insert(self):
     db = PrefixTrie()
     value = "string"
     db["test"] = value
     self.assertEqual(db["test"], value)
Ejemplo n.º 8
0
 def __init__(self, default=IPv4):
     self._db = PrefixTrie()
     self._factory = default
Ejemplo n.º 9
0
 def __init__(self, default=IPv4):
     self._db = PrefixTrie()
     self._factory = default