コード例 #1
0
 def setUp(self):
     self.root = TrieNode()
     self.root.insert('ab', 'ab')
     self.root.insert('bc', 'bc')
     self.root.insert('cd', 'cd')
     self.root.insert('bcde', 'bcde')
     self.root.insert('bcdf', 'teste')
コード例 #2
0
def test_should_add_child_node_to_an_existent_node():
    trie_node = TrieNode('a')
    b_node = trie_node.add_child_node('b', True)
    assert trie_node.has_children()
    assert trie_node.get_child_node('b') is not None
    assert b_node.char == 'b'
    assert b_node.is_last_node
    assert b_node.has_children() is False
コード例 #3
0
 def test_init(self):
     data = "ABC"
     trie_node = TrieNode()
     assert bool(trie_node.children) == False
     assert len(trie_node.children) == 0
     assert trie_node.cost == None
     trie_node.children[data] = True
     assert trie_node.children[data] == True
コード例 #4
0
def test_should_get_word_from_parent_trie_node():
    trie_node = TrieNode('f')
    current_node = trie_node
    for char in 'acebook':
        current_node = current_node.add_child_node(char)
    current_node.set_as_last_node()
    word = ''.join(list(trie_node.get_word('f')))
    assert word == 'facebook'
コード例 #5
0
 def test_set_isend(self):
     mynode = TrieNode()
     self.assertFalse(mynode.isend)
     mynode.set_isend()
     self.assertTrue(mynode.isend)
     mynode.set_isend(False)
     self.assertFalse(mynode.isend)
     mynode.set_isend(True)
     self.assertTrue(mynode.isend)
     mynode.set_isend()
     self.assertTrue(mynode.isend)
コード例 #6
0
    def insert(self, word):
        assert type(word) == str, "You can insert String objects only!!"
        assert len(word) > 0, "You can't insert any empty String!!"

        if self.root.children == {}:
            new_node = TrieNode(word)
            new_node.is_word = True
            self.root.set_child(word[0], new_node)
        else:
            start_node = self.root
            self.__insert(start_node, word)
コード例 #7
0
 def test_height(self):
     # Create node with no children
     price = 0.04
     node = TrieNode(price)
     assert node.height() == 0
     # Attach child node
     node.children[2] = TrieNode(0.5)
     assert node.height() == 1
     node.children[2].children[4] = TrieNode(0.6)
     assert node.height() == 2
     node.children[2].children[3] = TrieNode(0.6)
     assert node.height() == 2
コード例 #8
0
def rhyme(N, words):
    root = TrieNode('*')

    for w in words:
        w = w[::-1]
        add(root, w)

    def calc_minimum_num_of_unpaired_words(node):
        """
        Calculates the minimum number of unpaired words for each node.
        :param node: The current node.
        :return: Number of unpaired words at that node.
        """
        if len(node.children) == 0:
            # Got leave --> word ends here
            return 1

        r = sum(calc_minimum_num_of_unpaired_words(c) for c in node.children)
        if node.word_finished:
            # Word finishes --> got a new word here
            r += 1
        if node.char != '*' and r >= 2:
            # If we have more than two words, we can pair them and remove
            # them.
            r = r - 2
        return r

    fv_root = calc_minimum_num_of_unpaired_words(root)
    result = N - fv_root
    return result
コード例 #9
0
ファイル: exampleplugins.py プロジェクト: JBarnden/SiaVid
    def build(self, data):

        # build list of actual lines for chunking
        lines = []

        with open(data, "r") as file:
            lines = file.readlines()

        # get our dict of word-indexed chunklists
        chunker = VSSChunkMiner()
        words = chunker.build(lines)

        # build a trie from chunklists
        trie = Trie()
        for word in words:
            if word != '':
                target = trie.getSubtree(word)
                if target == None:
                    target = TrieNode()
                    trie.addSubtree(word, target)
                else:
                    target = target.root

                for item in words[word]:
                    target.content.append(item)

        return trie
コード例 #10
0
def main():
    args = set_up_parser().parse_args()
    tokens = [SPLITTER.split(line.lower()) for line in args.infile]
    filter_func = partial(token_filter, allow_punc=args.allow_punc)
    ngram_length = 1
    dupes = 1
    freqs = TrieNode()
    while dupes:
        dupes = 0
        for line in tokens:
            for ngram in sliding_window_no_whitespace(line, ngram_length,
                                                      filter_func):
                dupes += freqs.add_phrase(ngram)
        ngram_length += 1
    heap = MaxHeap(size=args.top)
    freqs.find_top(args.top, heap, min_length=args.min_length)
    print(*heap.largest(), sep='\n')
コード例 #11
0
ファイル: TrieMiner.py プロジェクト: JBarnden/SiaVid
 def __init__(self, filename):
     chunker = SRTChunker(filename)
     self.trie = Trie()
     for word in chunker.words:
         if word != '':
             target = TrieNode()
             self.trie.addSubtree(word, target)
             target.content.append(chunker.words[word])
コード例 #12
0
 def __insert(self, start_node, word):
     while (word):
         ch = word[0]
         child = start_node.get_child(ch)
         if not child:
             new_node = TrieNode(word)
             new_node.is_word = True
             start_node.set_child(word[0], new_node)
             return
         else:
             child_data = child.get_data()
             # child has exactly the given word
             if child_data == word:
                 if not child.is_word:
                     child.is_word = True
                 return
             idx = find_last_common_idx(child_data, word)
             # child has part of the given word as a prefix
             if idx <= len(word) and idx != len(child_data):
                 # split child
                 new_node = TrieNode(child_data[:idx])
                 child.data = child_data[idx:]
                 new_node.set_child(child_data[idx], child)
                 # connect new_node to start_node
                 start_node.set_child(child_data[0], new_node)
                 child = new_node
             start_node = child
             word = word[idx:]
             if word == "":
                 start_node.is_word = True
コード例 #13
0
    def test_add_child(self):
        mynode = TrieNode()
        mynode.add_child(0)
        mynode.add_child('C')
        mynode.add_child('e')

        self.assertTrue(isinstance(mynode.children[0], TrieNode))
        self.assertTrue(isinstance(mynode.children[2], TrieNode))
        self.assertTrue(isinstance(mynode.children[4], TrieNode))
        self.assertEqual(mynode.children[1], None)
        self.assertEqual(mynode.children[3], None)

        savenode = mynode.children[2]
        mynode.add_child(2)
        self.assertEqual(savenode, mynode.children[2])

        with self.assertRaises(ValueError):
            mynode.add_child('ben')
コード例 #14
0
 def test_is_branch(self):
     # Create node with no children
     price = 0.04
     node = TrieNode(price)
     assert node.is_branch() is False
     # Attach 2 with price
     node.children[2] = TrieNode(0.5)
     assert node.is_branch() is True
コード例 #15
0
ファイル: search.py プロジェクト: angles-n-daemons/hare
def recursive_edit_distance_search(
    allowed_error: int,
    search_string: str,
    node: TrieNode,
    row: List[int],
) -> List[TrieNode]:
    new_row = [row[0] + 1]
    min_dist = row[0] + 1
    min_index = 0

    for i, char in enumerate(search_string):
        score_options = [
            row[i],
            row[i + 1],
            new_row[i],
        ]
        modifier = 0 if node.char == char else 1

        score = min(score_options) + modifier
        if score < min_dist:
            min_dist = score
            min_index = i + 1

        new_row.append(score)

    values = []
    if min_dist <= allowed_error:
        if node.in_vocab:
            # add the index value of the word end in the search string
            values.append(
                MatchResult(
                    length=min_index,
                    value=node.value(),
                    matching=search_string[0:min_index],
                    distance=min_dist,
                ))

    node_count = 1
    if min_dist <= allowed_error:
        for child in node.children.values():
            new_values, new_node_count = recursive_edit_distance_search(
                allowed_error,
                search_string,
                child,
                new_row,
            )
            values += new_values
            node_count += new_node_count

    return values, node_count
コード例 #16
0
    def setUp(self):
        self.common_prefix = ''.join(
            random.choices(string.ascii_letters + string.digits, k=16))
        self.ending_1 = ''.join(
            random.choices(string.ascii_letters + string.digits, k=16))
        self.ending_2 = ''.join(
            random.choices(string.ascii_letters + string.digits, k=16))
        self.string_1 = self.common_prefix + self.ending_1
        self.string_2 = self.common_prefix + self.ending_2
        self.not_string = self.common_prefix + ''.join(
            random.choices(string.ascii_letters + string.digits, k=16))

        self.root = TrieNode("")
        self.trie = Trie(self.root)
        self.trie.add_sentence(self.root, self.string_1)
        self.trie.add_sentence(self.root, self.string_2)
コード例 #17
0
ファイル: SAFARI.py プロジェクト: guys79/SAFARI
def hill_climb(DS, a, M, N, option=1):
    """
    The Hill CLimb algorithm from the paper (The main algorithm)
    :param option:
    :param DS: <SD,COMPS,OBS>. SD - the rules that defines the connection between the components.
                COMPS - The components of the model. OBS - The inputs and outputs.
    :param a: The observation
    :param M: Climb restart limit
    :param N: number of tries
    :param option: 1 if we want to use our data structure, 2 if we want to use the Trie
    :return: list of diagnoses
    """
    SD = DS[0]
    COMPS = DS[1]
    OBS = DS[2]
    if option == 1:
        R = DiagnosisData()
    elif option == 2:
        R = TrieNode("*")
    else:
        return
    n = 0
    while n < N:
        w = random_diagnosis(SD, a)
        m = 0
        while m < M:
            w_tag = improved_diagnosis(w)  # should be improved_diagnosis(w.p)
            if doesnt_entail_false(SD, a, w_tag):
                if len(w_tag[1]) == 0:
                    return [[]]
                w = w_tag
                m = 0
            else:
                m += 1
        sub_diagnoses = R.search_sub_diagnosis(w[1])
        if not is_subsumed(sub_diagnoses):
            add_to_trie(R, w[1])
            remove_subsumed(R, sub_diagnoses)

        n += 1
    if option == 1:
        if R.index == 0:  #No Solution
            return None
    elif option == 2:
        if len(R.children) == 0:
            return None
    return convert_trie_to_set_of_components(R)
コード例 #18
0
def load_corpus(corpus, case_sensitive=False):
    """Builds a Trie from a list of strings."""
    root = TrieNode()

    for word in corpus:
        if case_sensitive:
            root.insert(word, word)
        else:
            root.insert(word.lower(), word)

    return root
コード例 #19
0
ファイル: exampleplugins.py プロジェクト: JBarnden/SiaVid
    def build(self, data):
        words = data

        # build a trie from chunklists
        trie = Trie()
        for word in words:
            if word != '':
                target = trie.getSubtree(word)
                if target == None:
                    target = TrieNode()
                    trie.addSubtree(word, target)
                else:
                    target = target.root

                for item in words[word]:
                    target.content.append(item)

        return trie
コード例 #20
0
def add(root, word: str):
    """
    Adding a word in the trie structure
    """
    node = root
    node.counter+=1
    for char in word:
        # print(char)
        found_in_child = False
        # Search for the character in the children of the present `node`
        if char in node.children:
            node.children[char].counter += 1
            # print(char, node.children[char].counter)
            found_in_child = True
            node = node.children[char]
        # We did not find it so add a new chlid
        if not found_in_child:
            new_node = TrieNode(char)
            node.children[char] = new_node
            # And then point node to the new child
            node = new_node
    # Everything finished. Mark it as the end of a word.
    node.word_finished = True
コード例 #21
0
def test_should_set_trie_node_as_the_least_node():
    trie_node = TrieNode('a')
    trie_node.set_as_last_node()
    assert trie_node.is_last_node
コード例 #22
0
 def test_max_suggestions_negative(self):
     """Test use of negative integer max_suggestions."""
     node = TrieNode(label='')
     with self.assertRaises(ValueError):
         node.generate_suggestions(max_suggestions=-5)
コード例 #23
0
def test_should_trie_node_has_a_child_node():
    a_trie_node = TrieNode('a')
    a_trie_node.add_child_node('b')
    assert a_trie_node.get_child_node('b') is not None
コード例 #24
0
def test_should_trie_node_has_children_be_false():
    trie_node = TrieNode('a')
    assert trie_node.has_children() is False
コード例 #25
0
 def test_max_suggestions_float(self):
     """Test use of float max_suggestions."""
     node = TrieNode(label='')
     with self.assertRaises(TypeError):
         node.generate_suggestions(max_suggestions=5.0)
コード例 #26
0
 def test_init(self):
     price = 0.04
     node = TrieNode(price)
     assert len(node.children) == 10
     assert node.children[0] is None
     assert node.children[1] is None
コード例 #27
0
def test_should_get_child_node():
    trie_node = TrieNode('a')
    trie_node.add_child_node('b')
    b_node = trie_node.get_child_node('b')
    assert b_node.char == 'b'
コード例 #28
0
def test_should_get_or_add_child_node():
    trie_node = TrieNode('a')
    b_node = trie_node.get_or_add_child_node('b')
    assert trie_node.get_child_node('b') is not None
    assert trie_node.get_or_add_child_node('b') == b_node
コード例 #29
0
    def test_init(self):
        mynode = TrieNode()
        self.assertFalse(mynode.isend)

        othernode = TrieNode(True)
        self.assertTrue(othernode.isend)
コード例 #30
0
def test_should_create_trie_node():
    trie_node = TrieNode('a')
    assert trie_node.char == 'a'
    assert trie_node.is_last_node is False
    assert trie_node.has_children() is False