def ukkonen(text, n):
    text = text + '$'
    root, leaf = trie.TrieNode(''), trie.TrieNode(text[1:])
    root.add_child(leaf)
    S, head, shift = {root: root}, root, 0
    for i in range(2, n + 2):
        # niezmiennik: S[v] jest zdefiniowane dla wszystkich v != head(i - 1)
        child = head.children.get(text[i - shift])
        if (child is None or shift >= len(child.label)
                or text[i] != child.label[shift]):
            previous_head = None
            while shift > 0 or text[i] not in head.children:
                v = (break_node(head, head.children[text[i - shift]], shift)
                     if shift > 0 else head)
                v.add_child(trie.TrieNode(text[i:]))
                if head == root:
                    shift -= 1
                if previous_head is not None:
                    S[previous_head] = v
                previous_head, head = v, S[head]
                if shift > 0:
                    head, shift = fast_find(head,
                                            text[i - shift:i],
                                            split=False)
            if previous_head is not None:
                S[previous_head] = head
            child = head.children.get(text[i - shift]) if shift >= 0 else None
        if child is not None and len(child.label) == shift + 1:
            head, shift = head.children[text[i - shift]], 0
        else:
            shift += 1
    return root, S
Beispiel #2
0
def _get_suffix_tree(t):
    if len(t) == 1:
        return [1], [0]
    root, leaf = trie.TrieNode(''), trie.TrieNode(t[0:])
    root.node_type, leaf.node_type = NODETYPE.NONE, NODETYPE.NONE
    root.add_child(leaf)
    To, A_To, LCP_To = _get_odd_suffix_tree(t)
    Te, A_Te, LCP_Te = _get_even_suffix_tree(t, To, A_To)

    Tovermerged = _get_faulty_merge(To, Te, len(t))
    Tovermerged.odd_even_pairs = {}
    _initialise_odd_even_pairs_recurse(Tovermerged, len(t),
                                       Tovermerged.odd_even_pairs)
    LCA = lca.LCA(Tovermerged)
    Tovermerged.d_links = {
        k: LCA.query(o + 1, e + 1)
        for k, (o, e) in Tovermerged.odd_even_pairs.items()
        if o and e and k != len(t) + 1 and o < len(t) and e < len(t)
    }
    Tovermerged.depth_in_d_tree = {root: 0}
    for pair in Tovermerged.odd_even_pairs:
        _compute_depth_in_d_tree(pair, Tovermerged.depth_in_d_tree,
                                 Tovermerged.d_links)

    return _merge_suffix_arrays(A_To, A_Te, LCP_To, LCP_Te, LCA, t,
                                Tovermerged.depth_in_d_tree)
def from_suffix_array_and_lcp(SA, LCP, text, n):
  root, leaf = trie.TrieNode(''), trie.TrieNode(text[SA[0] - 1:])
  root.set_depth(0)
  root.add_child(leaf)
  leaf.set_depth(len(leaf.label))
  root.index, leaf.index = n + 2, n + 2 - leaf.depth

  next_index, last_node = root.index + 1, leaf
  for a, lcp in zip(SA[1:], LCP):
    suffix = text[a - 1:]
    current_node = last_node
    while current_node.depth > lcp:
      current_node = current_node.parent
    if current_node.depth == lcp:
      if current_node.depth == len(suffix):
        new_node = current_node
      else:
        new_node = trie.TrieNode(suffix[current_node.depth:])
        current_node.add_child(new_node)
    else:
      rightmost_child = max(current_node.children.items())[1]
      split_node = break_node(
          current_node, rightmost_child, lcp - current_node.depth)
      split_node.set_depth(lcp)
      if suffix[lcp:]:
        new_node = trie.TrieNode(suffix[lcp:])
        split_node.add_child(new_node)
        split_node.index = next_index
        next_index += 1
      else:
        new_node = split_node
    new_node.index = n + 2 - len(suffix)
    new_node.set_depth(len(suffix))
    last_node = new_node
  return root
def naive(text, n):
    text = text + '$'
    root, leaf = trie.TrieNode(''), trie.TrieNode(text[1:])
    root.add_child(leaf)
    for i in range(2, n + 2):
        head, remaining = slow_find(root, text[i:])
        leaf = trie.TrieNode(text[-remaining:])
        head.add_child(leaf)
    return root
def get_suffix_tree(text):
  root, leaf = trie.TrieNode(''), trie.TrieNode(text[0:])
  root.add_child(leaf)
  if len(text) == 1:
    return root, [1], [0]
  (To, A_To, LCP_To) = get_odd_suffix_tree(text)
  (Te, A_Te, LCP_Te) = get_even_suffix_tree(text, To, A_To)
  Tovermerged = get_faulty_merge(To, Te, text)
  initialise_odd_even_pairs(Tovermerged, len(text))
  lca = LCA()
  lca.preprocess(Tovermerged)
  compute_d_links(lca, len(text), Tovermerged)
  compute_depths_in_d_tree(Tovermerged)
  A_T, LCP_T = merge_suffix_arrays(A_To, A_Te, LCP_To, LCP_Te, lca, text,
                                   getattr(Tovermerged, "depth_in_d_tree"))
  T = suffix_and_lcp_array_to_tree(A_T, LCP_T, text)
  return T, A_T, LCP_T
def suffix_and_lcp_array_to_tree(A, LCP, text):
  next_index = len(text) + 1
  n = len(A)
  root = trie.TrieNode('')
  leaf = trie.TrieNode(text[A[0] - 1:])
  root.set_depth(0)
  root.add_child(leaf)
  leaf.set_depth(len(leaf.label))
  leaf.index = len(text) + 1 - leaf.depth
  root.index = next_index
  next_index += 1

  lastNode = leaf
  for i in range(1, n):
    suffix = text[A[i] - 1:]
    currentNode = lastNode
    while currentNode.depth > LCP[i - 1]:
      currentNode = currentNode.parent
    if currentNode.depth == LCP[i - 1]:
      if currentNode.depth == len(suffix):
        newNode = currentNode
      else:
        newNode = trie.TrieNode(suffix[currentNode.depth:])
        currentNode.add_child(newNode)
    else:
      rightmostChild = \
      sorted(currentNode.children.items())[len(currentNode.children) - 1][1]
      splitNode = suffix_tree.break_node(currentNode, rightmostChild,
                                         LCP[i - 1] - currentNode.depth)
      splitNode.set_depth(LCP[i - 1])
      if len(suffix[LCP[i - 1]:]) >= 1:
        newNode = trie.TrieNode(suffix[LCP[i - 1]:])
        splitNode.add_child(newNode)
        splitNode.index = next_index
        next_index += 1
      else:
        newNode = splitNode
    newNode.index = len(text) - len(suffix) + 1
    newNode.set_depth(len(suffix))
    lastNode = newNode
  return root
def mccreight(text, n):
    text = text + '$'
    root, leaf = trie.TrieNode(''), trie.TrieNode(text[1:])
    root.add_child(leaf)
    S, head = {}, root
    for _ in range(2, n + 2):
        # niezmiennik: S[v] jest zdefiniowane dla wszystkich v != head(i - 1)
        if head == root:
            # wyjatek 1: drzewo z jednym lisciem
            beta, gamma, v = '', head.children[leaf.label[0]].label[1:], root
        else:
            if head.parent == root:
                # wyjatek 2: head.parent jest rootem
                beta = head.parent.children[head.label[0]].label[1:]
            else:
                beta = head.parent.children[head.label[0]].label
            gamma = head.children[leaf.label[0]].label
            v, _ = fast_find(S[head.parent], beta, split=True)
        S[head] = v
        head, remaining = slow_find(v, gamma)
        leaf = trie.TrieNode(text[-remaining:])
        head.add_child(leaf)
    return root, S
def weiner(text, n):
    text = text + '$'
    root = trie.TrieNode('')
    link, head = {(root, ''): root}, root
    for i in range(n + 1, 0, -1):
        # niezmiennik: link[v][c] = u dla u i v takich, ze word(u) = c word(v)
        v, depth = head, n + 2
        while v != root and link.get((v, text[i])) is None:
            v, depth = v.parent, depth - len(v.label)
        u = link.get((v, text[i]))
        if u is None or text[depth] in u.children:
            if u is None:
                u, remaining = slow_find(root, text[depth - 1:])
            else:
                u, remaining = slow_find(u, text[depth:])
            v, _ = fast_find(v, text[depth:-remaining], False)
            depth = len(text) - remaining
            if u != root:
                link[(v, text[i])] = u
        leaf = trie.TrieNode(text[depth:])
        u.add_child(leaf)
        head = leaf
    return root, link
Beispiel #9
0
def _get_faulty_merge_recurse(node, odd, even, n):
    if node.parent is None:
        node.index = n + 1
    # Listy posortowane w kolejnosci: odd, even, wyjscie
    o_children = sorted(odd.children.items())
    e_children = sorted(even.children.items())
    i, j = 0, 0
    while i < len(o_children) or j < len(e_children):
        o_char, o_child = o_children[i] if i < len(o_children) else (None,
                                                                     None)
        e_char, e_child = e_children[j] if j < len(e_children) else (None,
                                                                     None)

        empty_node = trie.TrieNode('')
        empty_node.node_type = NODETYPE.NONE
        if i == len(o_children):
            new_node = _create_node(node, e_child, NODETYPE.EVEN, n)
            _get_faulty_merge_recurse(new_node, empty_node, e_child, n)
            j += 1
        elif j == len(e_children) or o_char < e_char:
            new_node = _create_node(node, o_child, NODETYPE.ODD, n)
            _get_faulty_merge_recurse(new_node, o_child, empty_node, n)
            i += 1
        elif o_char == e_char:
            if len(e_child.label) > len(o_child.label):
                new_node = _create_node(node, o_child, NODETYPE.ODD, n)
                split_node = sufftree.break_node(even, e_child,
                                                 len(o_child.label))
                split_node.node_type = NODETYPE.NONE
                _get_faulty_merge_recurse(new_node, o_child, split_node, n)
            elif len(e_child.label) < len(o_child.label):
                new_node = _create_node(node, e_child, NODETYPE.EVEN, n)
                split_node = sufftree.break_node(odd, o_child,
                                                 len(e_child.label))
                split_node.node_type = NODETYPE.NONE
                _get_faulty_merge_recurse(new_node, split_node, e_child, n)
            else:
                new_node = _create_node(
                    node, o_child if e_child.index > n else e_child,
                    NODETYPE.BOTH, n)
                _get_faulty_merge_recurse(new_node, o_child, e_child, n)
            i, j = i + 1, j + 1
        else:
            new_node = _create_node(node, e_child, NODETYPE.EVEN, n)
            _get_faulty_merge_recurse(new_node, empty_node, e_child, n)
            j += 1
def get_faulty_merge(To, Te, text):
  root = trie.TrieNode('')
  get_faulty_merge_recurse.next_index = len(text) + 2
  get_faulty_merge_recurse(root, To, Te, text)
  return root
def get_faulty_merge_recurse(node, odd, even, text):
  def set_index(new_nd, old_nd):
    n = len(text)
    if old_nd.index <= n:
      new_nd.index = old_nd.index
    else:
      new_nd.index = get_faulty_merge_recurse.next_index
      get_faulty_merge_recurse.next_index += 1

  if node.parent is None:
    node.index = len(text) + 1
  i = 0
  j = 0
  # two following lines won't work linear, but for clarity, they are written
  # this way
  # Farach's algorithm works on lists which are already sorted in odd,
  # even and resulting tree, but current API uses
  # dictionary to represent children set and hence the call to 'sorted'
  # function; if we used list instead of dict,
  # we would simply omit them
  o_children = sorted(odd.children.items())
  e_children = sorted(even.children.items())
  while i < len(o_children) or j < len(e_children):
    o_child = e_child = o_char = e_char = None
    if i < len(o_children):
      o_child = o_children[i][1]
      o_char = o_children[i][0]
    if j < len(e_children):
      e_child = e_children[j][1]
      e_char = e_children[j][0]

    if i == len(o_children):
      new_node = trie.TrieNode(e_child.label)
      set_index(new_node, e_child)
      node.add_child(new_node)
      setattr(new_node, "even", True)
      get_faulty_merge_recurse(new_node, trie.TrieNode(""), e_child, text)
      j += 1
      continue
    if j == len(e_children):
      new_node = trie.TrieNode(o_child.label)
      set_index(new_node, o_child)
      node.add_child(new_node)
      setattr(new_node, "odd", True)
      get_faulty_merge_recurse(new_node, o_child, trie.TrieNode(""), text)
      i += 1
      continue

    if o_char == e_char:
      o_len = len(o_child.label)
      e_len = len(e_child.label)
      if o_len != e_len:
        odd_shorter, short_label = (True, o_child.label) if e_len > o_len else (
            False, e_child.label)
        new_node = trie.TrieNode(short_label)
        node.add_child(new_node)
        if odd_shorter:
          setattr(new_node, "odd", True)
          set_index(new_node, o_child)
        else:
          setattr(new_node, "even", True)
          set_index(new_node, e_child)
        # add new artificial node in even or odd tree to recurse properly
        if odd_shorter:
          split_node = suffix_tree.break_node(even, e_child, o_len)
          get_faulty_merge_recurse(new_node, o_child, split_node, text)
        else:
          split_node = suffix_tree.break_node(odd, o_child, e_len)
          get_faulty_merge_recurse(new_node, split_node, e_child, text)
      else:  # o_len == e_len and o_char == e_char
        new_node = trie.TrieNode(o_child.label)
        set_index(new_node, e_child if e_child.index <= len(text) else o_child)
        node.add_child(new_node)
        setattr(new_node, "odd", True)
        setattr(new_node, "even", True)
        get_faulty_merge_recurse(new_node, o_child, e_child, text)
      i += 1
      j += 1
    else:
      if o_char < e_char:
        new_node = trie.TrieNode(o_child.label)
        set_index(new_node, o_child)
        node.add_child(new_node)
        get_faulty_merge_recurse(new_node, o_child, trie.TrieNode(""), text)
        i += 1
        setattr(new_node, "odd", True)
      else:
        new_node = trie.TrieNode(e_child.label)
        set_index(new_node, e_child)
        node.add_child(new_node)
        get_faulty_merge_recurse(new_node, trie.TrieNode(""), e_child, text)
        j += 1
        setattr(new_node, "even", True)
def break_node(parent, child, index):
    u = trie.TrieNode(child.label[:index])
    child.label = child.label[index:]
    u.add_child(child)
    parent.add_child(u)
    return u
Beispiel #13
0
def _get_faulty_merge(To, Te, n):
    root = trie.TrieNode('')
    root.node_type = NODETYPE.NONE
    _get_faulty_merge_recurse.next_index = n + 2
    _get_faulty_merge_recurse(root, To, Te, n)
    return root
Beispiel #14
0
def _create_node(parent, other, node_type, n):
    new_node = trie.TrieNode(other.label)
    new_node.node_type = node_type
    _set_index(new_node, other, n)
    parent.add_child(new_node)
    return new_node