def load_state(env, alloc): db = env.db state = SecureTrie(Trie(db, BLANK_ROOT)) count = 0 databaseLog.debug("Loading state from snapshot") for addr in alloc: databaseLog.debug("[%d] loading account %s", count, addr) account = alloc[addr] acct = Account.blank_account(db, env.config['ACCOUNT_INITIAL_NONCE']) if len(account['storage']) > 0: t = SecureTrie(Trie(db, BLANK_ROOT)) c = 0 for k in account['storage']: v = account['storage'][k] enckey = zpad(decode_hex(k), 32) t.update(enckey, decode_hex(v)) c += 1 if c % 1000 and len(db.db_service.uncommitted) > 50000: databaseLog.debug("%d uncommitted. committing...", len(db.db_service.uncommitted)) db.commit() acct.storage = t.root_hash if account['nonce']: acct.nonce = int(account['nonce']) if account['balance']: acct.balance = int(account['balance']) state.update(decode_hex(addr), rlp.encode(acct)) count += 1 db.commit() return state
def classifier(self, test_data0, test_data1): test_set1 = test_data0 test_set2 = test_data1 rules_tree1 = Trie(test_set1) rules_tree2 = Trie(test_set2) rules_tree1.trie_for_rules() rules_tree2.trie_for_rules() imp_rules1 = rules_tree1.important_rules_selection(0.0001) imp_rules2 = rules_tree2.important_rules_selection(0.0001) inf1 = RulesImportance(imp_rules1, rules_tree1, rules_tree2, 1.2) inf2 = RulesImportance(imp_rules2, rules_tree2, rules_tree1, 1.2) classifier = Classification(inf1, inf2) all_data = {} temp_object = _from_list_to_dataset(all_data, test_data0, 0, 1) n_0 = len(test_data0) + 1 all_data = _from_list_to_dataset(temp_object, test_data1, 1, n_0) results = [] print("Length Length %f" % len(all_data)) for id in all_data.iterkeys(): data = all_data[id]["data"] status = all_data[id]["status"] results.append((self._classify_object(data), status)) return results
def __init__(self): #ES+w2v实现实体链接和消歧 self.es_helper = es_helper() self.w2v = self.loadw2v(cur_dir + "/data/w2v_qa_8w.bigram") #加载自定义词典 # jieba.load_userdict(cur_dir + "/../config/vocab/word.vocab") self.insur_kw = Trie() self.scop_kw = Trie() self.clauses_kw = Trie() self.type_kw = Trie() with open(cur_dir + "/../config/vocab/insurance.vocab", 'r') as fr: for line in fr: word = line.strip() self.insur_kw.add(word) with open(cur_dir + "/../config/vocab/scope.vocab", 'r') as fr: for line in fr: word = line.strip() self.scop_kw.add(word) # with open(cur_dir+"/../config/vocab/clauses.vocab", 'r') as fr: # for line in fr: # word = line.strip() # #print(word) # self.clauses_kw.add(word) with open(cur_dir + "/../config/vocab/type.vocab", 'r') as fr: for line in fr: word = line.strip() self.type_kw.add(word)
def __init__(self, doc, n_gram=5, min_len=2, custom_dict_path=None): sentences = process(doc) self.corpus = seg_with_custom_dict(sentences, custom_dict_path) VisualWord.corpus = self.corpus self.n_gram = n_gram self.min_len = min_len candidates = self.gen_candidates(self.corpus) self.total = len(candidates) WordInfo.total = self.total self.prefix_trie = Trie() self.suffix_trie = Trie(reverse=True) self.prefix_trie.build(candidates) self.suffix_trie.build(candidates) self.words_info = list(set([self.calculate(candidate) for candidate in candidates if self.min_len <= len(candidate) <= self.n_gram])) avg = np.array([w.score * w.count for w in self.words_info]).mean() self.words_info = [w for w in self.words_info if w.score * w.count > avg] self.filter_sub_words() self.words_info.sort(key=lambda w: w.score * w.count, reverse=True) self.real_words = [wi.real_word for wi in self.words_info]
def setUpClass(cls): # root # / \ # / \ # a * b # / \ / # d n * a # / / \ / | \ # d * d * y * g * t * y * # / \ \ # e s * h * # / # l * # asterisk denotes a word trie = Trie() cls._trie = Trie() cls._trie.insert("a") cls._trie.insert("add") cls._trie.insert("an") cls._trie.insert("and") cls._trie.insert("any") cls._trie.insert("bagel") cls._trie.insert("bag") cls._trie.insert("bags") cls._trie.insert("bat") cls._trie.insert("bath") cls._trie.insert("bay") cls._trie_length = 11 # magic number, the number of words in the trie
def filter_sub_words(self): vwords = [wi.vword for wi in self.words_info] pre_trie = Trie() suf_trie = Trie(reverse=True) pre_trie.build(vwords) suf_trie.build(vwords) words_info = set() for wi in self.words_info: vword = wi.vword left = self.get_word_count(vword, prefix_search=True, suffix=True) right = self.get_word_count(vword, prefix_search=True, suffix=False) is_suf = len(left) > 0 is_pre = len(right) > 0 if not is_suf and not is_pre: words_info.add(wi) pre = ('住所地', '住所', '住') suf = ('分行', '公司') for p in pre: if wi.real_word.startswith(p): wi.real_word = wi.real_word.replace(p, '') break for s in suf: idx = wi.real_word.find(s) if idx != -1: idx += len(s) wi.real_word = wi.real_word[0:idx] break self.words_info = list(words_info)
def func(): # 等待tags和alias加载完毕 while not tags or not all_alias: time.sleep(0.1) cmd_trie, tag_trie = Trie(), Trie() cmd_trie.add(cfg.cmds) cmd_trie.add(all_alias['cmd'].keys()) tag_trie.add(tags.keys()) global tries tries = {'cmd': cmd_trie, 'tag': tag_trie}
def test(self): trie = Trie() assert trie.size == 0 assert repr(trie) == "Trie({})" assert trie.words == [] trie.add('cat') assert trie.size == 1 assert repr(trie) == "Trie({'c': {'a': {'t': {'end': True}}}})" assert trie.words == ['cat'] trie.add('cat') assert trie.size == 1 assert trie.words == ['cat'] assert 'cat' in trie assert 'ca' not in trie assert 'catalog' not in trie trie.add('dog') assert trie.size == 2 assert trie.words == ['cat', 'dog'] assert 'dog' in trie assert trie.is_prefix('ca') is True trie.add('deer') trie.add('pan') trie.add('panda') assert trie.size == 5 assert trie.words == ['cat', 'dog', 'deer', 'pan', 'panda'] assert 'd' not in trie assert 'pan' in trie assert 'pand' not in trie assert trie.is_prefix('d') is True assert trie.is_prefix('do') is True assert trie.is_prefix('pa') is True assert trie.is_prefix('pan') is True assert trie.is_prefix('pand') is True assert trie.is_prefix('panda') is True assert trie.is_prefix('da') is False assert trie.is_prefix('pana') is False trie.add('do') assert trie.prefix_words('pa') == ['pan', 'panda'] assert trie.prefix_words('d') == ['do', 'dog', 'deer'] assert trie.prefix_words('do') == ['do', 'dog'] assert trie.prefix_words('cat') == ['cat'] assert trie.prefix_words('pap') == [] trie = Trie(['cat', 'dog', 'do', 'deer', 'pan', 'panda']) print('\n', trie)
def test_init(self): t1 = Trie() base1 = t1.get_base_node() self.assertFalse(base1.isend) t2 = Trie('testfile1.txt') # contains python, javascript, java base2 = t2.get_base_node() self.assertFalse(base2.isend) self.assertEqual(base2.children[3], None) self.assertTrue( isinstance(base2.children[ord('j') - ord('a')], TrieNode)) self.assertTrue( isinstance(base2.children[ord('p') - ord('a')], TrieNode))
def createTrie(dictionary_file): with open(dictionary_file, 'r') as fin: content = fin.readlines() trie = Trie() for line in content: head = trie word = line.rstrip() for letter in word: if not head[letter]: head.addBranch(Trie(letter)) head = head[letter] head.setWordEnd() return trie
def __init__(self, db, chainid, height, apphash): self.db = db self.chain_id = chainid self.last_block_height = height self.last_block_hash = apphash self.storage = StateTrie(Trie(self.db, apphash)) """
def to_dict(self): state = self.state.to_dict(True) nstate = {} for s in state: t = Trie('statedb', state[s][STORAGE_INDEX]) o = [0] * ACCT_RLP_LENGTH o[NONCE_INDEX] = decode_int(state[s][NONCE_INDEX]) o[BALANCE_INDEX] = decode_int(state[s][BALANCE_INDEX]) o[CODE_INDEX] = state[s][CODE_INDEX] td = t.to_dict(True) o[STORAGE_INDEX] = {k: decode_int(td[k]) for k in td} nstate[s.encode('hex')] = o return { "number": self.number, "prevhash": self.prevhash, "uncles_root": self.uncles_root, "coinbase": self.coinbase, "state": nstate, "transactions_root": self.transactions_root, "difficulty": self.difficulty, "timestamp": self.timestamp, "extradata": self.extradata, "nonce": self.nonce }
def _save_trie(rsc_dir, entries): """ 트라이를 저장한다. Args: rsc_dir: 대상 리소스 디렉토리 entries: 엔트리 리스트 """ trie = Trie() total_tag_nums = 0 for entry in entries: val = total_tag_nums val += 1 # 인덱스는 0이 아니라 1부터 시작한다. val *= 2 # 어절 완전일치의 경우 짝수 val += 1 if entry.is_pfx else 0 # 전망매칭 패턴의 경우 홀수 trie.insert(entry.word, val) total_tag_nums += len(entry.tag_nums) trie.save(f'{rsc_dir}/preanal.tri') val_file = f'{rsc_dir}/preanal.val' with open(val_file, 'wb') as fout: fout.write(struct.pack('H', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다. for idx, entry in enumerate(entries, start=1): logging.debug('%d: %s: %s: %s', idx, entry.word, entry.tag_outs, entry.tag_nums) fout.write(struct.pack('H' * len(entry.tag_nums), *entry.tag_nums)) logging.info('value saved: %s', val_file) logging.info('total entries: %d', len(entries)) logging.info('expected size: %d', (sum([len(e.tag_nums) for e in entries])+1) * struct.Struct('H').size)
def test_should_autocomplete(client): trie = Trie() trie.insert_words(['Facebook', 'Facebook Lite', 'Faca']) api.trie = trie response = client.get(url_for('autocomplete'), query_string={'q': 'face'}) assert response.status_code == 200 assert response.json['options'] == ['Facebook', 'Facebook Lite']
def test_grid_bigger_grid(self): words = 'you shall know a word by the company it keeps'.split() dictionary = Trie(words) nrow = ncol = 6 grid = LetterGrid(''.join(words), nrow=nrow, ncol=ncol) self.assertCountEqual({'you', 'know', 'a', 'word', 'the', 'keeps'}, grid.find_words_from_dict(dictionary))
def find_words(board: List[List[str]], words: List[str]) -> List[str]: """ Given a 2D board and a list of words from the dictionary, find all words in the board. :see https://leetcode.com/problems/word-search-ii/ :param board: :param words: :return: """ if not board or not words: return [] trie = Trie() for word in words: trie.insert(word) exists = set() row, col = len(board), len(board[0]) for ri in range(row): for ci in range(col): _dfs(board, ri, ci, '', trie, exists) return list(exists)
def build_receipt_proof(w3, txn_hash): receipt_trie = Trie(db={}) receipt = w3.eth.getTransactionReceipt(txn_hash) block = w3.eth.getBlock(receipt.blockHash) for i, tr in enumerate(block.transactions): path = rlp.encode(i) sibling_receipt = w3.eth.getTransactionReceipt(tr.hex()) value = get_rlp_receipt(sibling_receipt) receipt_trie.set(path, value) if i == receipt.transactionIndex: rlp_txn_receipt = value # We are interested in this txn txn_path = rlp.encode(receipt.transactionIndex) parent_nodes = [] t = receipt_trie parent_nodes.append(t.root_node) node = t.root_node nibs = nibbles.bytes_to_nibbles(txn_path) for nib in nibs: if len(node) == 2: # Leaf node. We are done. break next_node = rlp.decode(t.db[node[nib]]) parent_nodes.append(next_node) node = next_node rlp_parent_nodes = rlp.encode(parent_nodes) print('Calculated hash = %s' % HexBytes(w3.sha3(rlp.encode(t.root_node))).hex()) print('Receipts root = %s' % HexBytes(block.receiptsRoot).hex()) return rlp_txn_receipt, receipt.blockHash, txn_path, rlp_parent_nodes
def from_corpus(cls, corpus): trie = Trie() number_of_docs = 0 for docID, document in enumerate(corpus): number_of_docs += 1 tokens = process(document.description) for index, token in enumerate(tokens): # create the term at which each rotation of the word will point term = Term(token, docID, index) token = token + "$" # rotate the word for the wildcard for i in token: trie.insert(token, term) token = token[1:] + token[0] if docID % 1000 == 0 and docID != 0: print(str(docID), end='...') # enable this to limit the dimension of the index for testing #if(docID % 20000 == 0): # break idx = cls() idx._number_of_docs = number_of_docs idx._trie = trie return idx
def test_largestCommonPrefix(self): string_list = ["hello", "hellow", "hel"] trie = Trie() for word in string_list: trie.build(word) longest, words = trie.wordsWihtLargestCommonPrefix() print(longest) print(words) self.assertEqual(longest == 5, True) result = ["hello", "hellow"] self.assertEqual(all([word in result for word in words]), True) string_list = ["ai", "aii", "aiii", "aiiii", "aiiiii", "aiiiiii"] for word in string_list: trie.build(word) longest, words = trie.wordsWihtLargestCommonPrefix() self.assertEqual(longest == 6, True) result = ["aiiiii", "aiiiiii"] print(words) self.assertEqual(all([word in result for word in words]), True)
def test_deprecated_trie(): with pytest.warns(DeprecationWarning): trie = Trie(db={}) trie[b'foo'] = b'bar' assert b'foo' in trie assert trie[b'foo'] == b'bar'
def main2(): dictionary = ['geeks', 'uiz', 'quiz', 'gee', 'geek'] root = Trie() for word in dictionary: root.insert(word) boggle = ['giz', 'uek', 'qse'] find_words(boggle, root)
def friendTrie(self): tr = Trie() users = list(self.graph.keys()) for i in range(len(users)): tr.insert(users[i]) pprint.pprint(tr.trie) return tr
def test_trie(actions, args, expecteds): trie = Trie() actuals = [] for action, arg, expected in zip(actions, args, expecteds): actual = getattr(trie, action)(*arg) actuals.append(actual) assert actuals == expecteds
def main(): dictionary = Trie('data/dictionary.txt') intro() replay = True ghost = Ghost(dictionary) while replay: print('-' * 60) print("Let's start!\nYou go first.") ghost.human_play() while True: ghost.computer_play() if ghost.check_result(ghost.curr_word, COMP): break print("------------------------") print("Your turn\nCurrent string:", ghost.curr_word) ghost.human_play() if ghost.check_result(ghost.curr_word, HUMAN): break # Enter nothing or anything starts with 'n' to quit. ans = input('Do you want to play another game? (Y/N) ').lower().strip() if not ans or ans[0] == 'n': print('Thanks for playing!') replay = False ghost.reset()
def __init__(self): insurance_data = cur_dir + "/data/insurance_data.csv" w2v_data = cur_dir + "/data/w2v.txt" self.ins_kw = Trie() self.invertIndex = self._build_invertIndex(insurance_data) self.w2v = self._build_w2vec(w2v_data) self.ins2v = self._build_ins2vec(insurance_data)
def set_trie(self, filename): """ sets up new trie object """ data = self.read_file(filename) if isinstance(data, list): self.trie = Trie() for word in data: self.trie.insert(word)
def build_cost_trie(data_file_path: str) -> Trie: """ Given path to data file will return a trie with cost of prefix Params: data_file_path: string of path to file with phone number prefix, cost data """ # get number prefixes and costs from # lines = numbers_and_costs_from(data_file_path) trie = Trie() # iterates through list of phone number prefixes # inserting them into our trie # line contains cost and phone number stopwatch.mark("Tokenizing input") with open(data_file_path, 'r') as f: lines = (l.split(',') for l in f.readlines()) stopwatch.mark("Building cost trie") for num, cost in lines: trie.insert(num[1:], float(cost)) return trie
class TestMediumSet(unittest.TestCase): trie = Trie() with open("25000_words.txt") as f_hdl: words = [entry.replace("\n", "") for entry in f_hdl.readlines()] trie.add_words(words) def test_prefix(self): expected_result = {'matches': ['zygote'], 'next_chars': ['o']} self.assertEqual(self.trie.from_prefix("zyg"), expected_result) def test_wrong_prefix(self): expected_result = {'matches': [], 'next_chars': []} self.assertEqual(self.trie.from_prefix("zzzz"), expected_result) def test_nonstr_prefix(self): self.assertRaises(TypeError, self.trie.from_prefix, ["zyg", "z"]) def test_none_prefix(self): self.assertRaises(ValueError, self.trie.from_prefix, None) def test_word_exists(self): self.assertTrue(self.trie.word_exists("ZYGOTE")) def test_word_exists_force_case(self): self.assertFalse(self.trie.word_exists("ZYGOTE", ignore_case=False)) def test_nonstr_word_exists(self): self.assertRaises(TypeError, self.trie.word_exists, ["zyg", "z"]) def test_none_word_exists(self): self.assertRaises(ValueError, self.trie.word_exists, None)
def test_init(self): tree = Trie() assert tree.size == 0 assert tree.is_empty() is True tree.insert('hello') tree.search('hello') == 'hello' tree.search('state') == None
def test_1(self): newTree = Trie() newTree.insert("hello") self.assertTrue(newTree.search("hello")) self.assertTrue(newTree.startsWith("hel")) self.assertFalse(newTree.search("bye")) self.assertFalse(newTree.startsWith("b"))