def test_trie(): trie = datrie.new(alphabet=string.printable) assert trie.is_dirty() == True assert 'foo' not in trie assert 'Foo' not in trie trie['foo'] = '5' assert 'foo' in trie assert trie['foo'] == '5' trie['Foo'] = 10 assert trie['Foo'] == 10 assert trie['foo'] == '5' del trie['foo'] assert 'foo' not in trie assert 'Foo' in trie assert trie['Foo'] == 10 try: x = trie['bar'] assert 0 == 1, "KeyError not raised" except KeyError: pass
def _trie(self): trie = datrie.new(string.ascii_lowercase) trie['foo'] = 10 trie['bar'] = 20 trie['foobar'] = 30 trie['foovar'] = 40 trie['foobarzartic'] = None return trie
def test_setdefault(): trie = datrie.new(string.ascii_lowercase) assert trie.setdefault('foo', 5) == 5 assert trie.setdefault('foo', 4) == 5 assert trie.setdefault('foo', 5) == 5 assert trie.setdefault('bar', 'vasia') == 'vasia' assert trie.setdefault('bar', 3) == 'vasia' assert trie.setdefault('bar', 7) == 'vasia'
def test_trie_items(): trie = datrie.new(string.ascii_lowercase) trie['foo'] = 10 trie['bar'] = 'foo' trie['foobar'] = 30 assert trie.items() == [('bar', 'foo'), ('foo', 10), ('foobar', 30)] assert trie.keys() == ['bar', 'foo', 'foobar'] assert trie.values() == ['foo', 10, 30]
def test_trie_ascii(): trie = datrie.new(string.ascii_letters) trie['x'] = 1 trie['y'] = 'foo' trie['xx'] = 2 assert trie['x'] == 1 assert trie['y'] == 'foo' assert trie['xx'] == 2
def longest_match_datrie(search): if longest_match_datrie.trie is None: import datrie t = longest_match_datrie.trie = datrie.new(alphabet=string.printable) for url in hosts: t[url.decode('ascii')] = 1 matches = longest_match_datrie.trie.keys(search.decode('ascii')) return max(matches, key=len) if matches else ''
def test_trie_unicode(): # trie for lowercase Russian characters trie = datrie.new(ranges=[('а', 'я')]) trie['а'] = 1 trie['б'] = 2 trie['аб'] = 'vasia' assert trie['а'] == 1 assert trie['б'] == 2 assert trie['аб'] == 'vasia'
def create_trie(): words = words100k() trie = datrie.new(_alphabet(words)) # trie = datrie.new(ranges = [ # ("'", "'"), # ('A', 'z'), # ('А', 'я'), # ]) for word in words: trie[word] = 1 return trie
def test_trie_save_load(): fd, fname = tempfile.mkstemp() trie = datrie.new(alphabet=string.printable) trie['foobar'] = 1 trie['foovar'] = 2 trie['baz'] = 3 trie['fo'] = 4 trie['Foo'] = 'vasia' trie.save(fname) del trie trie2 = datrie.Trie.load(fname) assert trie2['foobar'] == 1 assert trie2['baz'] == 3 assert trie2['fo'] == 4 assert trie2['foovar'] == 2 assert trie2['Foo'] == 'vasia'
def test_trie_fuzzy(): russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' alphabet = russian.upper() + string.ascii_lowercase words = list(set([ "".join([random.choice(alphabet) for x in range(random.randint(2,10))]) for y in range(1000) ])) trie = datrie.new(alphabet) enumerated_words = list(enumerate(words)) for index, word in enumerated_words: trie[word] = index random.shuffle(enumerated_words) for index, word in enumerated_words: assert word in trie, word assert trie[word] == index, (word, index)
from data_utils import Vocabulary, Dataset import datrie, string vocab = Vocabulary.from_file("1b_word_vocab.txt") #build vocab trie trie = datrie.new(string.ascii_lowercase) vocab_size = 100001 cnt = 0 for i in range(vocab_size): word = vocab.get_token(i) if word[0] == '<': continue #if pattern.match(word)==None: # continue trie[word] = i for key in trie.keys(u"pre"): print key, trie[key] trie.save("data/vocab_trie") assert u"china" in trie
def _trie(self): trie = datrie.new(string.ascii_lowercase) for index, word in enumerate(self.WORDS, 1): trie[word] = index return trie
def test_trie_len(): trie = datrie.new(string.ascii_lowercase) words = ['foo', 'f', 'faa', 'bar', 'foobar'] for word in words: trie[word] = None assert len(trie) == len(words)