コード例 #1
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie():
    trie = datrie.new(alphabet=string.printable)
    assert trie.is_dirty() == True

    assert 'foo' not in trie
    assert 'Foo' not in trie

    trie['foo'] = '5'
    assert 'foo' in trie
    assert trie['foo'] == '5'

    trie['Foo'] = 10
    assert trie['Foo'] == 10
    assert trie['foo'] == '5'
    del trie['foo']

    assert 'foo' not in trie
    assert 'Foo' in trie
    assert trie['Foo'] == 10

    try:
        x = trie['bar']
        assert 0 == 1, "KeyError not raised"
    except KeyError:
        pass
コード例 #2
0
ファイル: test_trie.py プロジェクト: marklr/datrie
 def _trie(self):
     trie = datrie.new(string.ascii_lowercase)
     trie['foo'] = 10
     trie['bar'] = 20
     trie['foobar'] = 30
     trie['foovar'] = 40
     trie['foobarzartic'] = None
     return trie
コード例 #3
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_setdefault():
    trie = datrie.new(string.ascii_lowercase)
    assert trie.setdefault('foo', 5) == 5
    assert trie.setdefault('foo', 4) == 5
    assert trie.setdefault('foo', 5) == 5
    assert trie.setdefault('bar', 'vasia') == 'vasia'
    assert trie.setdefault('bar', 3) == 'vasia'
    assert trie.setdefault('bar', 7) == 'vasia'
コード例 #4
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_items():
    trie = datrie.new(string.ascii_lowercase)
    trie['foo'] = 10
    trie['bar'] = 'foo'
    trie['foobar'] = 30
    assert trie.items() == [('bar', 'foo'), ('foo', 10), ('foobar', 30)]
    assert trie.keys() == ['bar', 'foo', 'foobar']
    assert trie.values() == ['foo', 10, 30]
コード例 #5
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_ascii():
    trie = datrie.new(string.ascii_letters)
    trie['x'] = 1
    trie['y'] = 'foo'
    trie['xx'] = 2

    assert trie['x'] == 1
    assert trie['y'] == 'foo'
    assert trie['xx'] == 2
コード例 #6
0
def longest_match_datrie(search):
    if longest_match_datrie.trie is None:
        import datrie
        t = longest_match_datrie.trie = datrie.new(alphabet=string.printable)
        for url in hosts:
            t[url.decode('ascii')] = 1

    matches = longest_match_datrie.trie.keys(search.decode('ascii'))
    return max(matches, key=len) if matches else ''
コード例 #7
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_unicode():
    # trie for lowercase Russian characters
    trie = datrie.new(ranges=[('а', 'я')])
    trie['а'] = 1
    trie['б'] = 2
    trie['аб'] = 'vasia'

    assert trie['а'] == 1
    assert trie['б'] == 2
    assert trie['аб'] == 'vasia'
コード例 #8
0
ファイル: speed.py プロジェクト: marklr/datrie
def create_trie():
    words = words100k()
    trie = datrie.new(_alphabet(words))
#    trie = datrie.new(ranges = [
#        ("'", "'"),
#        ('A', 'z'),
#        ('А', 'я'),
#    ])

    for word in words:
        trie[word] = 1
    return trie
コード例 #9
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_save_load():
    fd, fname = tempfile.mkstemp()
    trie = datrie.new(alphabet=string.printable)
    trie['foobar'] = 1
    trie['foovar'] = 2
    trie['baz'] = 3
    trie['fo'] = 4
    trie['Foo'] = 'vasia'
    trie.save(fname)
    del trie

    trie2 = datrie.Trie.load(fname)
    assert trie2['foobar'] == 1
    assert trie2['baz'] == 3
    assert trie2['fo'] == 4
    assert trie2['foovar'] == 2
    assert trie2['Foo'] == 'vasia'
コード例 #10
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_fuzzy():
    russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
    alphabet = russian.upper() + string.ascii_lowercase
    words = list(set([
        "".join([random.choice(alphabet) for x in range(random.randint(2,10))])
        for y in range(1000)
    ]))

    trie = datrie.new(alphabet)

    enumerated_words = list(enumerate(words))

    for index, word in enumerated_words:
        trie[word] = index

    random.shuffle(enumerated_words)
    for index, word in enumerated_words:
        assert word in trie, word
        assert trie[word] == index, (word, index)
コード例 #11
0
from data_utils import Vocabulary, Dataset
import datrie, string
vocab = Vocabulary.from_file("1b_word_vocab.txt")
#build vocab  trie
trie = datrie.new(string.ascii_lowercase)
vocab_size = 100001
cnt = 0
for i in range(vocab_size):
    word = vocab.get_token(i)
    if word[0] == '<':
        continue
    #if pattern.match(word)==None:
    #    continue
    trie[word] = i

for key in trie.keys(u"pre"):
    print key, trie[key]
trie.save("data/vocab_trie")
assert u"china" in trie
コード例 #12
0
ファイル: test_trie.py プロジェクト: marklr/datrie
 def _trie(self):
     trie = datrie.new(string.ascii_lowercase)
     for index, word in enumerate(self.WORDS, 1):
         trie[word] = index
     return trie
コード例 #13
0
ファイル: test_trie.py プロジェクト: marklr/datrie
def test_trie_len():
    trie = datrie.new(string.ascii_lowercase)
    words = ['foo', 'f', 'faa', 'bar', 'foobar']
    for word in words:
        trie[word] = None
    assert len(trie) == len(words)