def test_startswith(self): pd = PrefixSet() keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)] for key in reversed(keys): pd.add(key) subset = [k for k in keys if k.startswith('ab')] self.assertSequenceEqual(subset, list(pd.startswith('ab')))
def insert_search_delete(self, keys): pd = PrefixSet() for key in keys: pd.add(key) self.assertEqual(len(pd), len(set(keys))) for key in keys: self.assertIn(key, pd) for key in keys: pd.discard(key) self.assertEqual(len(pd), 0) for key in keys: self.assertFalse(key in pd) self.assertEqual(len(pd._root), 0)
def test_pickle(self): pd = PrefixSet() pd.add('a') pickle.dumps(pd, pickle.HIGHEST_PROTOCOL)
def test_reversed(self): pd = PrefixSet() keys = [''.join(combo) for combo in itertools.product('abc', repeat=3)] for key in keys: pd.add(key) self.assertSequenceEqual(list(reversed(keys)), list(reversed(pd)))
def test_init_iterable(self): pd = PrefixSet(['a']) self.assertIn('a', pd)
def test_invalid_key(self): pd = PrefixSet() self.assertRaises(TypeError, operator.setitem, pd, 0)
def test_commonprefix_full(self): pd = PrefixSet(['abcd']) self.assertEqual('abcd', pd.commonprefix('abcd'))
def test_iter_post_el(self): pd = PrefixSet(['a', 'b', 'c']) pd.remove('b') list(pd)
def test_commonprefix_half(self): pd = PrefixSet(['abcd']) self.assertEqual(b'ab', pd.commonprefix('abef'))
def test_commonprefix_empty(self): pd = PrefixSet(['abcd']) self.assertEqual(b'', pd.commonprefix('efgh'))
def test_sort_order(self): pd = PrefixSet() keys = ['', 'a', 'aa', 'ab', 'b', 'ba'] for key in reversed(keys): pd.add(key) self.assertSequenceEqual(keys, list(iter(pd)))
def test_startswith_empty(self): pd = PrefixSet() pd.add('a') self.assertSequenceEqual([], list(pd.startswith('b')))
# -*- coding: utf-8 -*- import json import os import re from prefixtree import PrefixSet file = os.path.join(os.path.dirname(__file__), "../dictionaries/areas.json") places = os.path.join(os.path.dirname(__file__), "../dictionaries/places.txt") import ast ps = PrefixSet() with open(file, 'r') as f,\ open(places, 'w') as out: content = f.read() spaces = re.findall('"[^"]+"', content) for s in spaces: space = ast.literal_eval(s) ps.add(space) out.write("%s 30000 ns\n" % space) # 北京 34488 ns assert "大连" not in ps assert ps.startswith("大连") for x in ps.startswith("大连"): print(x)