def __insert(self, start_node, word): while (word): ch = word[0] child = start_node.get_child(ch) if not child: new_node = TrieNode(word) new_node.is_word = True start_node.set_child(word[0], new_node) return else: child_data = child.get_data() # child has exactly the given word if child_data == word: if not child.is_word: child.is_word = True return idx = find_last_common_idx(child_data, word) # child has part of the given word as a prefix if idx <= len(word) and idx != len(child_data): # split child new_node = TrieNode(child_data[:idx]) child.data = child_data[idx:] new_node.set_child(child_data[idx], child) # connect new_node to start_node start_node.set_child(child_data[0], new_node) child = new_node start_node = child word = word[idx:] if word == "": start_node.is_word = True
def test_is_branch(self): # Create node with no children price = 0.04 node = TrieNode(price) assert node.is_branch() is False # Attach 2 with price node.children[2] = TrieNode(0.5) assert node.is_branch() is True
def test_height(self): # Create node with no children price = 0.04 node = TrieNode(price) assert node.height() == 0 # Attach child node node.children[2] = TrieNode(0.5) assert node.height() == 1 node.children[2].children[4] = TrieNode(0.6) assert node.height() == 2 node.children[2].children[3] = TrieNode(0.6) assert node.height() == 2
def setUp(self): self.root = TrieNode() self.root.insert('ab', 'ab') self.root.insert('bc', 'bc') self.root.insert('cd', 'cd') self.root.insert('bcde', 'bcde') self.root.insert('bcdf', 'teste')
def rhyme(N, words): root = TrieNode('*') for w in words: w = w[::-1] add(root, w) def calc_minimum_num_of_unpaired_words(node): """ Calculates the minimum number of unpaired words for each node. :param node: The current node. :return: Number of unpaired words at that node. """ if len(node.children) == 0: # Got leave --> word ends here return 1 r = sum(calc_minimum_num_of_unpaired_words(c) for c in node.children) if node.word_finished: # Word finishes --> got a new word here r += 1 if node.char != '*' and r >= 2: # If we have more than two words, we can pair them and remove # them. r = r - 2 return r fv_root = calc_minimum_num_of_unpaired_words(root) result = N - fv_root return result
def build(self, data): # build list of actual lines for chunking lines = [] with open(data, "r") as file: lines = file.readlines() # get our dict of word-indexed chunklists chunker = VSSChunkMiner() words = chunker.build(lines) # build a trie from chunklists trie = Trie() for word in words: if word != '': target = trie.getSubtree(word) if target == None: target = TrieNode() trie.addSubtree(word, target) else: target = target.root for item in words[word]: target.content.append(item) return trie
def test_should_get_word_from_parent_trie_node(): trie_node = TrieNode('f') current_node = trie_node for char in 'acebook': current_node = current_node.add_child_node(char) current_node.set_as_last_node() word = ''.join(list(trie_node.get_word('f'))) assert word == 'facebook'
def test_should_add_child_node_to_an_existent_node(): trie_node = TrieNode('a') b_node = trie_node.add_child_node('b', True) assert trie_node.has_children() assert trie_node.get_child_node('b') is not None assert b_node.char == 'b' assert b_node.is_last_node assert b_node.has_children() is False
def __init__(self, filename): chunker = SRTChunker(filename) self.trie = Trie() for word in chunker.words: if word != '': target = TrieNode() self.trie.addSubtree(word, target) target.content.append(chunker.words[word])
def test_init(self): data = "ABC" trie_node = TrieNode() assert bool(trie_node.children) == False assert len(trie_node.children) == 0 assert trie_node.cost == None trie_node.children[data] = True assert trie_node.children[data] == True
def test_set_isend(self): mynode = TrieNode() self.assertFalse(mynode.isend) mynode.set_isend() self.assertTrue(mynode.isend) mynode.set_isend(False) self.assertFalse(mynode.isend) mynode.set_isend(True) self.assertTrue(mynode.isend) mynode.set_isend() self.assertTrue(mynode.isend)
def insert(self, word): assert type(word) == str, "You can insert String objects only!!" assert len(word) > 0, "You can't insert any empty String!!" if self.root.children == {}: new_node = TrieNode(word) new_node.is_word = True self.root.set_child(word[0], new_node) else: start_node = self.root self.__insert(start_node, word)
def load_corpus(corpus, case_sensitive=False): """Builds a Trie from a list of strings.""" root = TrieNode() for word in corpus: if case_sensitive: root.insert(word, word) else: root.insert(word.lower(), word) return root
def setUp(self): self.common_prefix = ''.join( random.choices(string.ascii_letters + string.digits, k=16)) self.ending_1 = ''.join( random.choices(string.ascii_letters + string.digits, k=16)) self.ending_2 = ''.join( random.choices(string.ascii_letters + string.digits, k=16)) self.string_1 = self.common_prefix + self.ending_1 self.string_2 = self.common_prefix + self.ending_2 self.not_string = self.common_prefix + ''.join( random.choices(string.ascii_letters + string.digits, k=16)) self.root = TrieNode("") self.trie = Trie(self.root) self.trie.add_sentence(self.root, self.string_1) self.trie.add_sentence(self.root, self.string_2)
def hill_climb(DS, a, M, N, option=1): """ The Hill CLimb algorithm from the paper (The main algorithm) :param option: :param DS: <SD,COMPS,OBS>. SD - the rules that defines the connection between the components. COMPS - The components of the model. OBS - The inputs and outputs. :param a: The observation :param M: Climb restart limit :param N: number of tries :param option: 1 if we want to use our data structure, 2 if we want to use the Trie :return: list of diagnoses """ SD = DS[0] COMPS = DS[1] OBS = DS[2] if option == 1: R = DiagnosisData() elif option == 2: R = TrieNode("*") else: return n = 0 while n < N: w = random_diagnosis(SD, a) m = 0 while m < M: w_tag = improved_diagnosis(w) # should be improved_diagnosis(w.p) if doesnt_entail_false(SD, a, w_tag): if len(w_tag[1]) == 0: return [[]] w = w_tag m = 0 else: m += 1 sub_diagnoses = R.search_sub_diagnosis(w[1]) if not is_subsumed(sub_diagnoses): add_to_trie(R, w[1]) remove_subsumed(R, sub_diagnoses) n += 1 if option == 1: if R.index == 0: #No Solution return None elif option == 2: if len(R.children) == 0: return None return convert_trie_to_set_of_components(R)
def main(): args = set_up_parser().parse_args() tokens = [SPLITTER.split(line.lower()) for line in args.infile] filter_func = partial(token_filter, allow_punc=args.allow_punc) ngram_length = 1 dupes = 1 freqs = TrieNode() while dupes: dupes = 0 for line in tokens: for ngram in sliding_window_no_whitespace(line, ngram_length, filter_func): dupes += freqs.add_phrase(ngram) ngram_length += 1 heap = MaxHeap(size=args.top) freqs.find_top(args.top, heap, min_length=args.min_length) print(*heap.largest(), sep='\n')
def test_add_child(self): mynode = TrieNode() mynode.add_child(0) mynode.add_child('C') mynode.add_child('e') self.assertTrue(isinstance(mynode.children[0], TrieNode)) self.assertTrue(isinstance(mynode.children[2], TrieNode)) self.assertTrue(isinstance(mynode.children[4], TrieNode)) self.assertEqual(mynode.children[1], None) self.assertEqual(mynode.children[3], None) savenode = mynode.children[2] mynode.add_child(2) self.assertEqual(savenode, mynode.children[2]) with self.assertRaises(ValueError): mynode.add_child('ben')
def build(self, data): words = data # build a trie from chunklists trie = Trie() for word in words: if word != '': target = trie.getSubtree(word) if target == None: target = TrieNode() trie.addSubtree(word, target) else: target = target.root for item in words[word]: target.content.append(item) return trie
def add(root, word: str): """ Adding a word in the trie structure """ node = root node.counter+=1 for char in word: # print(char) found_in_child = False # Search for the character in the children of the present `node` if char in node.children: node.children[char].counter += 1 # print(char, node.children[char].counter) found_in_child = True node = node.children[char] # We did not find it so add a new chlid if not found_in_child: new_node = TrieNode(char) node.children[char] = new_node # And then point node to the new child node = new_node # Everything finished. Mark it as the end of a word. node.word_finished = True
def test_no_label(self): """Test creation of node with no label.""" with self.assertRaises(TypeError): no_label_node = TrieNode()
def test_should_get_child_node(): trie_node = TrieNode('a') trie_node.add_child_node('b') b_node = trie_node.get_child_node('b') assert b_node.char == 'b'
def test_should_get_or_add_child_node(): trie_node = TrieNode('a') b_node = trie_node.get_or_add_child_node('b') assert trie_node.get_child_node('b') is not None assert trie_node.get_or_add_child_node('b') == b_node
def test_should_create_trie_node(): trie_node = TrieNode('a') assert trie_node.char == 'a' assert trie_node.is_last_node is False assert trie_node.has_children() is False
def test_should_trie_node_has_children_be_false(): trie_node = TrieNode('a') assert trie_node.has_children() is False
def test_init(self): mynode = TrieNode() self.assertFalse(mynode.isend) othernode = TrieNode(True) self.assertTrue(othernode.isend)
def test_should_set_trie_node_as_the_least_node(): trie_node = TrieNode('a') trie_node.set_as_last_node() assert trie_node.is_last_node
def test_should_trie_node_has_a_child_node(): a_trie_node = TrieNode('a') a_trie_node.add_child_node('b') assert a_trie_node.get_child_node('b') is not None
def test_max_suggestions_negative(self): """Test use of negative integer max_suggestions.""" node = TrieNode(label='') with self.assertRaises(ValueError): node.generate_suggestions(max_suggestions=-5)
def test_init(self): price = 0.04 node = TrieNode(price) assert len(node.children) == 10 assert node.children[0] is None assert node.children[1] is None
def test_max_suggestions_float(self): """Test use of float max_suggestions.""" node = TrieNode(label='') with self.assertRaises(TypeError): node.generate_suggestions(max_suggestions=5.0)