def createExampleToReport():

    nodes = []
    rootNode = Node(aId="root")

    inner_2 = Node(aId="inner2")
    inner_8 = Node(aId="inner8")

    rootNode.add_child(inner_2)

    nodes.append(Node(aId=3))
    inner_2.add_child(nodes[0])

    inner_4 = Node(aId="inner4")

    inner_2.add_child(inner_4)

    nodes.append(Node(aId=5))
    nodes.append(Node(aId=6))
    nodes.append(Node(aId=7))

    inner_4.add_child(nodes[1])
    inner_4.add_child(nodes[2])
    inner_4.add_child(nodes[3])

    rootNode.add_child(inner_8)
    nodes.append(Node(aId=9))
    nodes.append(Node(aId=10))
    inner_8.add_child(nodes[4])
    inner_8.add_child(nodes[5])

    lca_al = lca.LCA()
    lca_al.preprocess(rootNode)

    print(rootNode.fancyprintLCA())
def test_tree_one():
    tree, nodes = createTreeOne()

    lca_al = lca.LCA()
    lca_al.preprocess(tree)
    assert lca_al.query(nodes[0], nodes[1]).id == "inner1"
    assert lca_al.query(nodes[2], nodes[3]).id == "inner2"
    assert lca_al.query(nodes[0], nodes[3]).id == "root"
    assert lca_al.query(nodes[3], nodes[0]).id == "root"
    assert lca_al.query(nodes[4], nodes[5]).id == "root"
def test_tree_two():
    tree, nodes = createTreeTwo()
    lca_al = lca.LCA()
    lca_al.preprocess(tree)
    assert lca_al.query(nodes[7], nodes[8]).PREORDER == 5
    assert lca_al.query(nodes[0], nodes[1]).PREORDER == 2
    assert lca_al.query(nodes[1], nodes[3]).PREORDER == 1
    assert lca_al.query(nodes[1], nodes[4]).PREORDER == 1
    assert lca_al.query(nodes[1], nodes[5]).PREORDER == 1
    assert lca_al.query(nodes[0], nodes[5]).PREORDER == 1
    assert lca_al.query(nodes[4], nodes[5]).PREORDER == 8
def check_correctness(string):

    string = farach.str2int(string)
    constructed_tree = farach.construct_suffix_tree(string)

    id2node = []
    constructed_tree.traverse(lambda n: id2node.append((n.id, n))
                              if 'inner' not in str(n.id) else 'do nothing')

    id2node = dict(id2node)
    constructed_tree.update_leaf_list
    leaflist = constructed_tree.leaflist
    lca_al = lca.LCA()
    lca_al.preprocess(constructed_tree)

    for i in leaflist:
        for j in leaflist:
            assert farach.naive_lca(i, j, constructed_tree,
                                    id2node) == lca_al.query(i, j)
def test_tree_four():
    string = 'mississippi'
    string = farach.str2int(string)
    constructed_tree = farach.construct_suffix_tree(string)
    constructed_tree.update_leaf_list
    leaflist = constructed_tree.leaflist

    lca_al = lca.LCA()
    lca_al.preprocess(constructed_tree)

    assert str(lca_al.query(leaflist[1],
                            leaflist[2]).leaflist) == "[node2, node5]"
    assert str(
        lca_al.query(leaflist[0], leaflist[2]).leaflist
    ) == "[node1, node2, node5, node8, node11, node4, node7, node3, node6, node10, node9, node12]"
    assert str(lca_al.query(
        leaflist[6], leaflist[8]).leaflist) == "[node4, node7, node3, node6]"
    assert str(lca_al.query(leaflist[9],
                            leaflist[10]).leaflist) == "[node10, node9]"
def test_tree_three():
    string = '12121'
    string = farach.str2int(string)
    constructed_tree = farach.construct_suffix_tree(string)
    constructed_tree.update_leaf_list
    leaflist = constructed_tree.leaflist
    lca_al = lca.LCA()
    lca_al.preprocess(constructed_tree)

    assert str(lca_al.query(leaflist[0],
                            leaflist[1]).leaflist) == "[node1, node3]"
    assert str(lca_al.query(
        leaflist[2],
        leaflist[3]).leaflist) == "[node1, node3, node5, node2, node4, node6]"
    assert str(lca_al.query(leaflist[0],
                            leaflist[2]).leaflist) == "[node1, node3, node5]"
    assert str(lca_al.query(leaflist[3],
                            leaflist[4]).leaflist) == "[node2, node4]"
    assert str(lca_al.query(
        leaflist[0],
        leaflist[5]).leaflist) == "[node1, node3, node5, node2, node4, node6]"
def compute_lcp_tree(t_overmerged):
    global _lcp_depth
    ''' Augments every, to the algorithm relevant, node in t_overmerged with
        an attribute, node.suffix_link, pointing to the node representing
        the string of the current node minus first character
        Running time: O(n)
    '''

    lca_nodepairs = []

    def helper(node):
        nonlocal lca_nodepairs
        if hasattr(node, 'lca_even'):
            lca_nodepairs.append((node.lca_even, node.lca_odd))

    t_overmerged.traverse(helper)

    id2node = []
    t_overmerged.traverse(lambda n: id2node.append((n.id, n))
                          if 'inner' not in str(n.id) else 'do nothing')
    id2node = dict(id2node)

    # ---------------------------------------
    # CREATE LCP TREE
    # ---------------------------------------
    lca_f = fast_lca.LCA()
    lca_f.preprocess(t_overmerged)
    for node1, node2 in lca_nodepairs:
        # TODO: using naive_lca to find lca to create suffix link, this
        #       must instead be the constant time lookup as described in
        #       the article [Ht84], otherwise we do not achieve O(n) running
        #       time for the algorithm

        lca = lca_f.query(id2node[node1.id], id2node[node2.id])
        # lca_naive = naive_lca(node1, node2, t_overmerged, id2node)

        # assert lca == lca_naive

        if (lca.id == 'root' or node1.id + 1 not in id2node
                or node2.id + 1 not in id2node):
            # we cannot create a suffix link from root as it is undefined
            continue
        node1_next = id2node[node1.id + 1]
        node2_next = id2node[node2.id + 1]

        lca_parent = lca_f.query(node1_next, node2_next)
        # lca_parent_naive = naive_lca(node1_next, node2_next, t_overmerged, id2node)

        # assert(lca_parent == lca_parent_naive)
        lca.suffix_link = lca_parent
    # ---------------------------------------
    # ADD LCP DEPTH TO ALL NODES USING A SINGLE DFS
    # ---------------------------------------
    def lcp_depth(node):
        if hasattr(node, 'lcp_depth'):
            # we already computed this node as a result of computing an
            # earlier node with a suffix link to this node, no need to
            # repeat the computation
            return node.lcp_depth

        if hasattr(node, 'suffix_link'):
            if not hasattr(node.suffix_link, 'lcp_depth'):
                # our suffix link is to a node for which we have not yet
                # computed the lcp depth; do so, return it to here and
                # continue the bfs. This is still within O(n) as we simply
                # skip the node when we encounter it the second time in
                # the initial bfs
                node.lcp_depth = lcp_depth(node.suffix_link) + 1
            node.lcp_depth = node.suffix_link.lcp_depth + 1
            return node.lcp_depth

    t_overmerged.lcp_depth = 0
    t_overmerged.bfs(lcp_depth)

    del lca_nodepairs, id2node, lca_f,
def T_even(t_odd, inputstr):
    global _even_calls
    S = inputstr
    n = len(S)
    # (i)
    # find the lexicographical ordering of the even suffixes
    leaflist = []

    def get_leafs(node):
        nonlocal leaflist
        if node.is_leaf():
            leaflist.append(node)

    t_odd.dfs(get_leafs)

    odd_suffix_ordering = [node.id for node in leaflist]  # t_odd.leaflist]

    # even_suffixes is a list of tuples (x[2i], suffix[2i + 1]) to radix sort
    even_suffixes = [(int(S[node - 2]), node) for node in odd_suffix_ordering
                     if node != 1]

    radixsort.sort(even_suffixes, 0)

    even_suffixes = [tup[1] - 1 for tup in even_suffixes]
    # in case S is of even length, n % 2 == 0, the even suffix at pos n
    # is the last one in the sorted list, as it starts with character '$'
    # which, by definition, is ranked as |alphabet| + 1, i.e. last character
    # We need to add this one specifically, as it is not found by counting
    # all odd suffixes down by one
    # e.g.: if the inputstr is of length 4, then odd suffixes are 1 and 3
    #       if we only count even suffixes as odd suffixes prefixed with
    #       a character, we will never capture 4, as 5 is not an odd suffix
    #       hence why we need to manually add it as the last one as it is '$'
    if n % 2 == 0:
        even_suffixes.append(n)

    # (ii)
    # compute lcp for adjacent even suffixes

    lca_f = fast_lca.LCA()
    lca_f.preprocess(t_odd)
    id2node = []
    t_odd.traverse(lambda n: id2node.append((n.id, n))
                   if 'inner' not in str(n.id) else 'do nothing')
    id2node = dict(id2node)

    lcp = {}
    for idx in range(0, len(even_suffixes) - 1):
        i = even_suffixes[idx]
        j = even_suffixes[idx + 1]
        curr_lcp = 0

        if (S[i - 1] == S[j - 1] and i < n and j < n):
            if j + 1 in id2node and i + 1 in id2node:
                lca_parent = lca_f.query(id2node[i + 1], id2node[j + 1])
                curr_lcp = lca_parent.str_length + 1
            else:
                curr_lcp = 1

        lcp[(even_suffixes[idx], even_suffixes[idx + 1])] = curr_lcp
    # (iii)
    # construct T_even using information from (i) and (ii)
    root = Node(aId='root')
    fst_suf = even_suffixes[0]
    fst_suf_len = n - fst_suf + 1  # S[fst_suf - 1:]

    node_fst_suf = Node(fst_suf_len, fst_suf)
    root.add_child(node_fst_suf)
    id2node = {fst_suf: node_fst_suf}

    currLoopTime = 0
    updatingLeafList = 0

    for i in range(1, len(even_suffixes)):
        prev_suf = even_suffixes[i - 1]
        curr_suf = even_suffixes[i]
        curr_lcp = lcp[(prev_suf, curr_suf)]
        prev_lcp = None
        if i > 1:
            prevprev_suf = even_suffixes[i - 2]
            prev_lcp = lcp[(prevprev_suf, prev_suf)]

        if curr_lcp == 0:
            curr_suf_len = n - curr_suf + 1
            new_node = Node(curr_suf_len, curr_suf)
            root.add_child(new_node)
            id2node[curr_suf] = new_node
        else:
            if prev_lcp:

                prev_node = id2node[prev_suf]

                # we need to append the new node to somewhere on the
                # path from root to the parent of the prev_node.
                # This might involve following a lot of nodes'
                # parentEdges to find the spot
                # TODO: is it O(n)???
                remaining_until_insertion = prev_lcp - curr_lcp

                possible_insertion_node = prev_node.parent

                while remaining_until_insertion > 0:
                    # run up through parentEdges until
                    # remaining_until_insertion is 0
                    len_of_edge = possible_insertion_node.str_length - possible_insertion_node.parent.str_length

                    remaining_until_insertion -= len_of_edge
                    possible_insertion_node = possible_insertion_node.parent

                # possible_insertion_node is now the spot at which we
                # should place curr_suf
                # we need to pop the rightmost child of the
                # possible_insertion_node as we need to insert an inner
                # node with this child and our new_node as children in
                # place of this rightmost child, if
                # remaining_until_insertion is negative and not exactly
                # 0, in which case we can just add_child(new_node)

                if remaining_until_insertion == 0:
                    len_newnode = n - curr_suf + 1
                    new_node = Node(len_newnode, curr_suf)
                    id2node[curr_suf] = new_node
                    possible_insertion_node.add_child(new_node)
                else:
                    child_of_insertion_node = possible_insertion_node.children.pop(
                    )
                    split_idx = abs(remaining_until_insertion)
                    inner_parentEdge_len = child_of_insertion_node.parent.str_length + split_idx
                    innernode = Node(inner_parentEdge_len, 'inner')
                    len_newnode = n - curr_suf + 1
                    new_node = Node(len_newnode, curr_suf)

                    possible_insertion_node.add_child(innernode)

                    innernode.add_child(child_of_insertion_node)
                    innernode.add_child(new_node)

                    id2node[curr_suf] = new_node

            else:
                innernode_len = curr_lcp
                innernode = Node(innernode_len, 'inner')

                new_node_len = n - curr_suf + 1
                new_node = Node(new_node_len, curr_suf)

                id2node[curr_suf] = new_node
                prev_node = id2node[prev_suf]

                # update prev_node by removing lcp from its parentEdge
                # as it has been assigned a new parent who's parentEdge
                # is exactly lcp

                # prev_node.parentEdge = prev_node.parentEdge[len(str_curr_lcp):]

                prev_node.parent.children[-1] = innernode
                innernode.parent = prev_node.parent

                # important! prev_node must be added before new_node to
                # keep lexicographic ordering of children
                innernode.add_child(prev_node)
                innernode.add_child(new_node)
    t_even = root
    t_even.update_leaf_list()

    del S, n, leaflist, odd_suffix_ordering, even_suffixes, lca_f, id2node
    return t_even