def test_repr(self):
     st = SuffixTree("t")
     output = '\tStart \tEnd \tSuf \tFirst \tLast \tString\n\t0 \t1 \t-1 \t0 \t0 \tt\n'
     import pdb
     pdb.set_trace()
     self.assertEqual(st.__repr__(), output)
Esempio n. 2
0
 def test_repr(self):
     st = SuffixTree("t")
     output = '\tStart \tEnd \tSuf \tFirst \tLast \tString\n\t0 \t1 \t-1 \t0 \t0 \tt\n'
     import pdb;pdb.set_trace()
     self.assertEqual(st.__repr__(), output)
 def test_repr(self):
     st = SuffixTree("t")
     output = '\tStart \tEnd \tSuf \tFirst \tLast \tString\n\t0 \t1 \t-1 \t0 \t0 \t\nt\n'
     self.assertEqual(st.__repr__(), output)
class SuffixClassifier(object):

    def __init__(self, D_plus, D_minus, alphabet):
        self.word = '1'.join(D_plus) + '1' + '0'.join(D_minus) + '0$'
        self.suffixTree = SuffixTree(self.word)
        self.alphabet = alphabet | {'0', '1', '$'}
        self.edges = self.suffixTree.edges
        if self.__has_edges(0): # bug fix
            self.suffixTree.nodes[0].suffix_node = 0

    def print_suffix_tree(self, printDirectDebug = True):
        print(self.word)
        self.print_tree_part('0', 0)
        print()
        if printDirectDebug:
            print(self.suffixTree.__repr__())

    def print_tree_part(self, prefix, nodeId):
        found = False
        first = True
        for e in self.suffixTree.edges.values():
            if e.source_node_index == nodeId:
                found = True
                text = self.suffixTree.string[e.first_char_index:e.last_char_index+1]
                if first:
                    first = False
                else:
                    prefix = (' ' * (len(prefix) - 1)) + '\\'
                data_node = self.__print_data_node(e.source_node_index)
                link = "%s%s--'%s'--%d" % (prefix, data_node, text, e.dest_node_index)
                if not self.__has_edges(e.dest_node_index):
                    link += self.__print_data_node(e.dest_node_index)
                self.print_tree_part(link, e.dest_node_index)
        if found == False:
            print(('\n' if prefix[0] in ['0','\\'] else '') + prefix)

    def __print_data_node(self, nodeId):
        if hasattr(self.suffixTree.nodes[nodeId], 'negative'):
            node = self.suffixTree.nodes[nodeId]
            return "(%d,%d)" % (node.negative, node.positive)

        return ""

    def truncate_tree(self):
        self.nodes = {} # convert to dictionary because we need node
                   # at specific index after remove any elements before
        for i in range(len(self.suffixTree.nodes)):
            u = self.suffixTree.nodes[i]
            u.positive = 0 #n_+
            u.negative = 0 #n_-
            self.nodes[i] = u

        self.__truncate(0)

    def __truncate(self, u: int):
        for v, l in self.__traverse(u):
            if not any(c in l for c in ['0','1','$']): # case (a)
                self.__truncate(v)
            elif l[0] in ['0', '1']: # (b) or (c)
                leafs = self.__remove_with_edge(u, v, l)
                self.__increment(u, l[0], leafs)
            elif l[0] == '$': #(d)
                self.edges.pop((u, l[0]))
                self.nodes.pop(v) # not remove u!
            else:
                e = self.edges[(u, l[0])]
                sub = e.first_char_index
                for c in l:
                    if c in ['0', '1']: # (e) or (f)
                        leafs = self.__remove_from_node(v)
                        self.__increment(v, c, leafs)
                        break
                    sub += 1
                e.last_char_index = sub - 1

    def __remove_from_node(self, u: int): #remove subtree from node
        if self.nodes[u].suffix_node == -1: #speedup, leaf
            return 1
        leafs = 0
        for v, l in self.__traverse(u):
            leafs += self.__remove_from_node(v)
            self.edges.pop((u, l[0]))
            self.nodes.pop(v)

        self.nodes[u].suffix_node = -1
        return leafs

    def __remove_with_edge(self, u: int, v: int, l: Label): #remove subtree from edge
        leafs = self.__remove_from_node(v)
        self.edges.pop((u, l[0]))
        self.nodes.pop(v)

        #check if u is not leaf now
        if not self.__has_edges(u):
            self.nodes[u].suffix_node = -1

        return leafs

    def recalculate(self, u):
        if self.nodes[u].suffix_node == -1:
            return self.nodes[u].negative, self.nodes[u].positive
        negative = 0
        positive = 0

        for v, l in self.__traverse(u):
            neg, pos = self.recalculate(v)
            negative += neg
            positive += pos
        self.__increment(u, '0', negative)
        self.__increment(u, '1', positive)
        return self.nodes[u].negative, self.nodes[u].positive

    def __increment(self, u: int, type: str, leafs):
        if type == '0':
            self.nodes[u].negative += leafs
        else:
            self.nodes[u].positive += leafs

    def __has_edges(self, u: int):
        return any(self.__traverse(u))

    def __traverse(self, u):
        for c in self.alphabet:
            if (u, c) in self.edges:
                e = self.edges[(u, c)]
                l = Label(self, e.first_char_index, e.last_char_index)
                yield (e.dest_node_index, l)
Esempio n. 5
0
class SuffixClassifier(object):
    def __init__(self, D_plus, D_minus, alphabet):
        self.word = '1'.join(D_plus) + '1' + '0'.join(D_minus) + '0$'
        self.suffixTree = SuffixTree(self.word)
        self.alphabet = alphabet | {'0', '1', '$'}
        self.edges = self.suffixTree.edges
        if self.__has_edges(0):  # bug fix
            self.suffixTree.nodes[0].suffix_node = 0

    def print_suffix_tree(self, printDirectDebug=True):
        print(self.word)
        self.print_tree_part('0', 0)
        print()
        if printDirectDebug:
            print(self.suffixTree.__repr__())

    def print_tree_part(self, prefix, nodeId):
        found = False
        first = True
        for e in self.suffixTree.edges.values():
            if e.source_node_index == nodeId:
                found = True
                text = self.suffixTree.string[e.first_char_index:e.
                                              last_char_index + 1]
                if first:
                    first = False
                else:
                    prefix = (' ' * (len(prefix) - 1)) + '\\'
                data_node = self.__print_data_node(e.source_node_index)
                link = "%s%s--'%s'--%d" % (prefix, data_node, text,
                                           e.dest_node_index)
                if not self.__has_edges(e.dest_node_index):
                    link += self.__print_data_node(e.dest_node_index)
                self.print_tree_part(link, e.dest_node_index)
        if found == False:
            print(('\n' if prefix[0] in ['0', '\\'] else '') + prefix)

    def __print_data_node(self, nodeId):
        if hasattr(self.suffixTree.nodes[nodeId], 'negative'):
            node = self.suffixTree.nodes[nodeId]
            return "(%d,%d)" % (node.negative, node.positive)

        return ""

    def truncate_tree(self):
        self.nodes = {}  # convert to dictionary because we need node
        # at specific index after remove any elements before
        for i in range(len(self.suffixTree.nodes)):
            u = self.suffixTree.nodes[i]
            u.positive = 0  #n_+
            u.negative = 0  #n_-
            self.nodes[i] = u

        self.__truncate(0)

    def __truncate(self, u: int):
        for v, l in self.__traverse(u):
            if not any(c in l for c in ['0', '1', '$']):  # case (a)
                self.__truncate(v)
            elif l[0] in ['0', '1']:  # (b) or (c)
                leafs = self.__remove_with_edge(u, v, l)
                self.__increment(u, l[0], leafs)
            elif l[0] == '$':  #(d)
                self.edges.pop((u, l[0]))
                self.nodes.pop(v)  # not remove u!
            else:
                e = self.edges[(u, l[0])]
                sub = e.first_char_index
                for c in l:
                    if c in ['0', '1']:  # (e) or (f)
                        leafs = self.__remove_from_node(v)
                        self.__increment(v, c, leafs)
                        break
                    sub += 1
                e.last_char_index = sub - 1

    def __remove_from_node(self, u: int):  #remove subtree from node
        if self.nodes[u].suffix_node == -1:  #speedup, leaf
            return 1
        leafs = 0
        for v, l in self.__traverse(u):
            leafs += self.__remove_from_node(v)
            self.edges.pop((u, l[0]))
            self.nodes.pop(v)

        self.nodes[u].suffix_node = -1
        return leafs

    def __remove_with_edge(self, u: int, v: int,
                           l: Label):  #remove subtree from edge
        leafs = self.__remove_from_node(v)
        self.edges.pop((u, l[0]))
        self.nodes.pop(v)

        #check if u is not leaf now
        if not self.__has_edges(u):
            self.nodes[u].suffix_node = -1

        return leafs

    def recalculate(self, u):
        if self.nodes[u].suffix_node == -1:
            return self.nodes[u].negative, self.nodes[u].positive
        negative = 0
        positive = 0

        for v, l in self.__traverse(u):
            neg, pos = self.recalculate(v)
            negative += neg
            positive += pos
        self.__increment(u, '0', negative)
        self.__increment(u, '1', positive)
        return self.nodes[u].negative, self.nodes[u].positive

    def __increment(self, u: int, type: str, leafs):
        if type == '0':
            self.nodes[u].negative += leafs
        else:
            self.nodes[u].positive += leafs

    def __has_edges(self, u: int):
        return any(self.__traverse(u))

    def __traverse(self, u):
        for c in self.alphabet:
            if (u, c) in self.edges:
                e = self.edges[(u, c)]
                l = Label(self, e.first_char_index, e.last_char_index)
                yield (e.dest_node_index, l)