Esempio n. 1
0
def build_tree1(tup3):

    tree = Tree()
    tree.create_node('Root', tup3[0])
    path_list = tup3[1]
    for i in range(len(path_list)):
        path = path_list[i]
        for j in range(len(path)):
            if j == 0 and path[j] != []:
                parent_chlidren = tree.is_branch(tup3[0])
                if parent_chlidren.__contains__(path[0]) == False:
                    row_col_str = "_" + str(i) + str(j)
                    tree.create_node(path[j],
                                     path[j] + row_col_str,
                                     parent=tup3[0])

            elif j > 0 and path[j] != []:
                parent_row_col_str = "_" + str(i) + str(j - 1)
                parent_chlidren = tree.is_branch(path[j - 1] +
                                                 parent_row_col_str)
                if parent_chlidren.__contains__(path[j]) == False:
                    row_col_str = "_" + str(i) + str(j)
                    tree.create_node(path[j], path[j] + str(count),
                                     path[j - 1])

    return tree
def nexus_from_dictionary(output_fname, md, show_tree=False):
    """
    convert a dictionary to nexus fileformat;
    Args:
        output_fname: filename for the output file; it will overwrite
            any existing files;
        md: the dictionary that contains the data; the keys are defined in the
            nexus file format;
        show_tree: [True, False] to show the hierarch tree or not;
    Return:
        None

    """
    # deal with empty dictionary
    if not md:
        return

    nx_tree = Tree()
    nx_tree.create_node('root', '/')

    for key in md.keys():
        # parse the path and get ride of the starting empty string;
        temp = key.split('/')[1:]
        for n, x in enumerate(temp):
            idf = '/' + '/'.join(temp[:n + 1])
            parent = '/' + '/'.join(temp[:n])
            # create branch if it doesn't exist
            if idf not in nx_tree.is_branch(parent):
                nx_tree.create_node(x, idf, parent=parent)

    if show_tree:
        nx_tree.show()

    if len(output_fname) < 3 or \
            output_fname[-3:] not in ['.h5', 'hdf', '.nx']:
        output_fname += '.nx'

    with h5py.File(output_fname, 'w') as f:
        create_nx_worker(f, nx_tree, '/', md)
Esempio n. 3
0
def FpGrowth(fName):
    
    readFile(fName)
    Cone = getSizeOneItemSet(globOriginalList)
    priorityDict = priorityDic(Cone)
    #print(priorityDict)
    tree = Tree()   
    tree.create_node("{}", "root")
    #reconstruct the whole transction database based on the priority
    counter = 0
    for set in globOriginalList:
        temp = dict()
        for element in set:
            priority = priorityDict.get(element)
            temp.update({element:priority})
            sorted_temp = sorted(temp.items(), key=operator.itemgetter(1))
            sorted_temp.reverse()
        #print(sorted_temp)
        # construct Fp tree
        root = "root"
        for tuple in sorted_temp:
            if(not tree.contains(tuple[0])):
                tree.create_node(tuple[0], tuple[0], root, 0)
                root = tuple[0]
            else: 
                if tuple[0] in tree.is_branch(root):
                    #print("node already in this branch, don't know what to do")
                    #print("going down")
                    root = tuple[0]
                    #print(root)
                else:
                    #print("should create a duplicate node")
                    tree.create_node(tuple[0], counter, root, 0)
                    root = counter
                    counter += 1
                # I need to decide whether to create a new node or not
                # the condition is under this branch if this node exist
                # so I should check the root
    tree.show()
Esempio n. 4
0
print(sep + "All family members in DEPTH mode:")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print(tree[node].tag)

print(sep + "All family members without Diane sub-family:")
tree.show(idhidden=False, filter=lambda x: x.identifier != "diane")
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag

print(sep + "Let me introduce Diane family only:")
sub_t = tree.subtree("diane")
sub_t.show()

print(sep + "Children of Diane")
for child in tree.is_branch("diane"):
    print(tree[child].tag)

print(sep + "OOhh~ new members join Jill's family:")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste("jill", new_tree)
tree.show()

print(sep + "They leave after a while:")
tree.remove_node(1)
tree.show()

print(sep + "Now Jill moves to live with Grand-x-father Harry:")
Esempio n. 5
0
for node in tree.expand_tree(mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 

print("#"*4 + "All family members without Diane sub-family")
for node in tree.expand_tree(filter=lambda x: x != 'diane', mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 

print("#"*4 + "Let me introduce Diane family only")
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 

print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')

print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()
print('\n')

print("#"*4 + "We are sorry they are gone accidently :(")
tree.remove_node(1)
tree.show()
print('\n')
Esempio n. 6
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Esempio n. 7
0
print(sep + "All family members in DEPTH mode:")
for node in tree.expand_tree(mode=Tree.ZIGZAG):
    print(tree[node].tag)

print(sep + "All family members without Diane sub-family:")
tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane')
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag

print(sep + "Let me introduce Diane family only:")
sub_t = tree.subtree('diane')
sub_t.show()

print(sep + "Children of Diane")
for child in tree.is_branch('diane'):
	print(tree[child].tag)

print(sep + "OOhh~ new members join Jill's family:")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()

print(sep + "They leave after a while:")
tree.remove_node(1)
tree.show()

print(sep + "Now Jill moves to live with Grand-x-father Harry:")
Esempio n. 8
0

if __name__ == '__main__':
    # str1 = "www.sasac.gov.cn/n2588025/n2588119/index.html?t=1573435313677"
    # pa = resolution_line(str1)
    # print(pa)
    #####
    # tup1 = (u'tybb.mof.gov.cn',[[u'printasyncwork'], [u'dnaserver'],[],[u'dnaserver'],[u'printasyncwork'],[u'printasyncwork']])
    # tup2 = (u'www.nkj.moa.gov.cn', [[u'ggzt', u''], [u'dwhz', u''], [u'tongji']])
    # list_path = duplicate_removal(tup1)
    # print(list_path)
    # print(count_gov_url(tup2))
    ###
    # tree = build_tree()
    # tree.show()
    #
    # print(tree.contains("child4"))
    # print(tree.is_branch("child1"))
    # list = ["aa","bb","c",0]
    # print(list.__contains__("aa"))
    #########################################
    tup3 = ("gov", [["1", "2"], ["2", "4"]])
    # tree1 = build_tree1(tup3)
    # tree1.show()
    tree = Tree()
    tree.create_node("gov", 0)
    tree.create_node("122", 1, parent=0)
    tree.create_node("2222", 2, parent=1)
    print(tree.is_branch(0))
    # print(tree.)
    tree.show()
Esempio n. 9
0
print("#"*4 + "All family members without Diane sub-family")
tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane')
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag
print('\n') 


print("#"*4 + "Let me introduce Diane family only")
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 


print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')


print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()
print('\n')


print("#"*4 + "We are sorry they are gone accidently :(")
tree.remove_node(1)
Esempio n. 10
0
print(sep + "All family members in DEPTH mode:")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print(tree[node].tag)

print(sep + "All family members without Diane sub-family:")
tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane')
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag

print(sep + "Let me introduce Diane family only:")
sub_t = tree.subtree('diane')
sub_t.show()

print(sep + "Children of Diane")
for child in tree.is_branch('diane'):
	print(tree[child].tag)

print(sep + "OOhh~ new members join Jill's family:")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()

print(sep + "They leave after a while:")
tree.remove_node(1)
tree.show()

print(sep + "Now Jill moves to live with Grand-x-father Harry:")
Esempio n. 11
0
class LegalDocMLconverter(PDFConverter):

    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')

    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
                 imagewriter=None, stripcontrol=False):
        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
                              laparams=laparams)
        self.imagewriter = imagewriter
        self.stripcontrol = stripcontrol
        self.textboxes = []
        self.page_width = []
        self.page_height = []
        self.classified = []
        self.classified_header = []
        self.classified_paragraph = []
        self.classified_section = []
        self.classified_subsection = []
        self.tree = Tree()
        self.tree.create_node("Documents", 'documents')
        self.num_tabs = 0
        self.write_header()

        self.headerExist = False
        self.in_li = False

        json_file = open('data/model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model = model_from_json(loaded_model_json)
        self.model.load_weights("data/model.h5")
        
        self.tokenizer = []

        with open('data/tokenizer.pickle', 'rb') as handle:
            self.tokenizer = pickle.load(handle)
        
        return

    def decode_tags(self, pred) :
        tags = {
            'header':0,
            'document':1,
            'paragraph':2,
            'topic':3,
            'section':4,
            'subsection':5,
            'li':6,
            'footer':7,
            'page_number':8,
            'figure':9,
            'table':10,
            'table_li':11,
            'commentary':12,
            '?':13,
        }
        decode = {v: k for k, v in tags.items()}
        num_tags = max(tags.values()) + 1
        
        return decode[np.argmax(pred)]

    def write(self, text):
        if self.codec:
            text = text.encode(self.codec)
        self.outfp.write(text)
        return

    def write_header(self):
        if self.codec:
            self.write('<?xml version="1.0" encoding="%s" ?>\n' % self.codec)
        else:
            self.write('<?xml version="1.0" ?>\n')
        self.write('<documents>\n')
        self.num_tabs = 1
        return

    def write_footer(self):
        self.write('</documents>\n')
        self.num_tabs = 0
        return

    def write_text(self, text):
        if self.stripcontrol:
            text = self.CONTROL.sub('', text)
        self.write(enc(text))
        return

    def write_tab(self):
        for i in range(self.num_tabs):
            self.write("\t")

    def receive_layout(self, ltpage):
        self.items = []

        def extract_text(item):
            if isinstance(item, LTPage):
                #print(bbox2str(item.bbox))
                self.page_width = item.x1
                self.page_height = item.y1
                for child in item:
                    extract_text(child)
            elif isinstance(item, LTFigure):
                for child in item:
                    extract_text(child)
            elif isinstance(item, LTTextBox):
                self.items.append(item)
            elif isinstance(item, LTChar):
                self.items.append(item)

        extract_text(ltpage)

        def get_y0(item):
            return item.y0

        def get_id(item):
            return item.index

        def get_size(item):
            if isinstance(item, LTChar):
                return item.size
            elif isinstance(item, LTAnno):
                return 0
            else:
                for child in item:
                    return get_size(child)

        self.items.sort(key=get_y0, reverse=True)

        def group_textboxes(items):
            new_items = []
            prev = items[0]
            for item in items[1:]:
                if isinstance(prev, LTChar):
                    box = LTTextBox()
                    box.add(prev)
                    box.set_bbox((prev.x0, prev.y0, prev.x1, prev.y1))
                    prev = box
                y_diff = (prev.y0 - item.y1)
                x_diff = (item.x0 - prev.x1)
                if y_diff < get_size(prev)/2 and x_diff < get_size(prev) and x_diff >= -get_size(prev)/2:
                    xs = [item.x0, item.x1, prev.x0, prev.x1]
                    ys = [item.y0, item.y1, prev.y0, prev.y1]
                    prev.add(item)
                    prev.set_bbox((min(xs), min(ys), max(xs), max(ys)))
                elif y_diff < get_size(prev)/2 and (item.x0 - prev.x0) < get_size(prev)/2 and (item.x1 - prev.x1) > -get_size(prev)/2:
                    vert = LTTextBoxVertical()
                    xs = [item.x0, item.x1, prev.x0, prev.x1]
                    ys = [item.y0, item.y1, prev.y0, prev.y1]
                    for child in prev:
                        vert.add(child)
                    vert.add(item)
                    vert.set_bbox((min(xs), min(ys), max(xs), max(ys)))
                    prev = vert
                else:
                    new_items.append(prev)
                    prev = item
                #new_items.append(prev)
                #prev = item
            new_items.append(prev)
            return new_items

        def classify(item):
            if isinstance(item, LTTextBox):
                wmode = ''
                if isinstance(item, LTTextBoxVertical):
                    wmode = ' wmode="vertical"'

                box = NLPTextBox(item)
                
                s = ('%s %d %d %d ' % (bbox2str(box.bbox), box.b, box.i, box.size) + item.get_text().replace('\n', ' '))
                
                s_list = []
                s_list.append(s)
                X = self.tokenizer.texts_to_sequences(s_list)
                maxlen = 100
                X = pad_sequences(X, padding='post', maxlen=maxlen)
                preds = self.model.predict(X)
                tag = self.decode_tags(preds)

                box.set_tag(tag)

                if (tag == "header"):
                    self.classified_header.append(box)
                elif (tag == "paragraph"):
                    self.classified_paragraph.append(box)
                elif (tag == "section"):
                    self.classified_section.append(box)
                elif (tag == "subsection"):
                    self.classified_subsection.append(box)

                self.classified.append(box)
                    
            else:
                assert False, str(('Unhandled', item))

        def into_tree():
            _header = self.classified_header[0]
            self.tree.create_node(_header.get_text(), _header.key, parent="documents", data=_header)

            for _section in self.classified_section:
                self.tree.create_node(_section.get_text(), _section.key, parent=_header.key, data=_section)

            for _sebsection in self.classified_subsection:
                keys = _sebsection.key.split('.')
                keys.pop()
                _key = ''.join([i + "." for i in keys])
                _key = _key[:-1] + ".0"
                if (not self.tree.contains(_key)):
                    data = NLPSimpleBox("section", _key)
                    self.tree.create_node(_key, _key, parent=_header.key, data=data)
                    self.classified.append(data)
                self.tree.create_node(_sebsection.get_text(), _sebsection.key, parent=_key, data=_sebsection)

            for _paragraph in self.classified_paragraph:
                keys = _paragraph.key.split('.')
                keys.pop()
                _key = ''.join([i + "." for i in keys])
                _key = _key[:-1]
                if (not self.tree.contains(_key)):
                    section_keys = _key.split('.')
                    section_keys.pop()
                    section_key = ''.join([i + "." for i in section_keys])
                    section_key = section_key[:-1] + ".0"

                    if (not self.tree.contains(section_key)):
                        data = NLPSimpleBox("section", _key)
                        self.tree.create_node(section_key, section_key, parent=_header.key, data=data)
                        self.classified.append(data)

                    data = NLPSimpleBox("subsection", _key)
                    self.tree.create_node(_key, _key, parent=section_key, data=data)
                    self.classified.append(data)
                try:
                    self.tree.create_node(_paragraph.get_text(), _paragraph.key, parent=_key, data=_paragraph)
                except:
                    self.tree.create_node(_paragraph.get_text(), _paragraph.key + ".0", parent=_key, data=_paragraph)

            new_classified = []

            prev_box = self.classified[0]
            for _boxes in self.classified:
                if _boxes.tag == "commentary":
                    if prev_box.tag == "commentary":
                        prev_box.text += _boxes.text
                        prev_box.set_tag("commentary")
                    else:
                        prev_box = _boxes
                else:
                    if prev_box.tag == "commentary":
                        new_classified.append(prev_box)
                    prev_box = _boxes
                    new_classified.append(_boxes)

            self.classified = new_classified

            for _boxes in self.classified:
                
                if (_boxes.tag == "footer"):
                    None
                elif (_boxes.tag == "page_number"):
                    None
                elif (_boxes.tag == "?"):
                    None
                elif (_boxes.tag == "topic"):
                    _prev_subsection = find_prev_with_tag(_boxes, "subsection")
                    try:
                        self.tree.create_node(_boxes.get_text(), _boxes.key, parent=_prev_subsection.key, data=_boxes)
                    except:
                        self.tree.create_node(_boxes.get_text(), _boxes.key  + '1', parent=_prev_subsection.key, data=_boxes)
        
                elif _boxes.tag != "header" and _boxes.tag != "paragraph" and _boxes.tag != "section" and _boxes.tag != "subsection":
                    _prev_paragraph = find_prev_with_tag(_boxes, "paragraph")
                    try:
                        self.tree.create_node(_boxes.get_text(), _prev_paragraph.key + "." + _boxes.key, parent=_prev_paragraph.key, data=_boxes)
                    except: 
                        self.tree.create_node(_boxes.get_text(), _prev_paragraph.key + "." + _boxes.key + '1', parent=_prev_paragraph.key, data=_boxes)
                    
        def find_prev_with_tag(item, tag):
            _prev = ''
            _next = False
            for _boxes in self.classified:
                if (_boxes.tag == tag):
                    _prev = _boxes
                    if (_next):
                        break
                if (_boxes == item):
                    if (_prev == ''):
                        _next = True
                    else:
                        break
            return _prev

        def get_node_id(node):
            return node.identifier

        def render(node):
            
            tag = ''
            item = node.data
            
            if isinstance(item, LTTextBox):
                wmode = ''
                if isinstance(item, LTTextBoxVertical):
                    wmode = ' wmode="vertical"'
                
                tag = item.tag

                if (tag == "header"):
                    if (not self.headerExist):
                        self.write_tab()
                        self.write('<document title="%s">\n' % item.get_text())     
                        self.num_tabs = self.num_tabs + 1
                        self.headerExist = True

                elif (tag == "paragraph"):
                    self.write_tab()
                    self.write('<paragraph key="%s">\n' % item.get_key())
                    self.num_tabs = self.num_tabs + 1
                    self.write_tab()
                    self.write("<p>" + item.get_text().replace('\n', ' ').lstrip().rstrip() + "</p>\n")

                elif (tag == "commentary"):
                    self.write_tab()
                    self.write('<commentary title="COMMENT:">')
                    self.write(item.get_text().replace('COMMENT:', '').lstrip())
                    self.write('</commentary>\n')

                elif (tag == "topic"):
                    self.write_tab()
                    self.write('<topic>')
                    self.write(item.get_text())
                    self.write('</topic>\n')

                elif (tag == "section"):
                    self.write_tab()
                    self.write('<section key="%s" title="%s">\n' % (item.get_key(), item.get_text()))
                    self.num_tabs = self.num_tabs + 1

                elif (tag == "subsection"):
                    self.write_tab()
                    self.write('<subsection key="%s" title="%s">\n' % (item.get_key(), item.get_text()))
                    self.num_tabs = self.num_tabs + 1

                elif (tag == "li"):
                    if (not self.in_li):
                        self.write_tab()
                        self.write('<ol>\n')
                        self.num_tabs = self.num_tabs + 1
                        self.in_li = True
                    self.write_tab()
                    if (item.list_tag):
                        self.write('<li key="%s">' % node.identifier)
                        self.write(item.get_text())
                        self.write('</li>\n')
                    else:
                        self.write('<li>')
                        self.write(item.get_text())
                        self.write('</li>\n')

                elif (tag == "footer"):
                    None
                elif (tag == "page_number"):
                    None
                elif (tag == "?"):
                    None
                else:
                    None
            

            branches = self.tree.is_branch(node.identifier)
            _branches = []
            for child in branches:
                _branches.append(self.tree.get_node(child))

            if (tag == "section" or tag == "subsection"):
                _branches.sort(key=get_node_id, reverse=False)

            for _child in _branches:
                render(_child)
            
            if (tag != "li" and tag != "footer" and tag != "page_number" and tag != "?" and self.in_li):
                self.num_tabs = self.num_tabs - 1
                self.write_tab()
                self.write('</ol>\n')
                self.in_li = False

            if (tag == "paragraph"):
                self.num_tabs = self.num_tabs - 1
                self.write_tab()
                self.write('</paragraph>\n')
            elif (tag == "header"):
                self.num_tabs = self.num_tabs - 1
                self.write_tab()
                self.write('</document>\n')
            elif (tag == "section"):
                self.num_tabs = self.num_tabs - 1
                self.write_tab()
                self.write('</section>\n')
            elif (tag == "subsection"):
                self.num_tabs = self.num_tabs - 1
                self.write_tab()
                self.write('</subsection>\n')  

        def highlights(item):
            s = ''
            prev_bold = False
            prev_italic = False
            for child in item:
                if isinstance(child, LTChar):
                    if 'Bold' in child.fontname:
                        if prev_italic:
                            s += '</i>'
                        if not prev_bold:
                            s += '<b>'
                            
                        prev_bold = True
                        prev_italic = False
                    elif 'Italic' in child.fontname:
                        if prev_bold:
                            s += '</b>'
                        if not prev_italic:
                            s += '<i>'
                        
                        prev_italic = True
                        prev_bold = False
                    else:
                        if prev_bold:
                            s += '</b>'
                        elif prev_italic:
                            s += '</i>'
                        prev_bold = False
                        prev_italic = False
                    
                    s += child.get_text()
                
                elif isinstance(child, LTTextLine):
                    s += highlights(child)
                elif isinstance(child, LTTextBox):
                    s += highlights(child)
                elif isinstance(child, NLPTextBox):
                    s += highlights(child)
                else:
                    if child.get_text() == '\n':
                        if prev_bold:
                            s += '</b>'
                        elif prev_italic:
                            s += '</i>'
                        
                        prev_bold = False
                        prev_italic = False
                    s +=  child.get_text()
            return s

        self.textboxes = group_textboxes(self.items)
        
        self.textboxes.sort(key=get_id, reverse=False)
        for item in self.textboxes:
            classify(item)
        
        into_tree()

        self.tree.show()

        render(self.tree.get_node("documents"))

        return

    def draw_layout(self, input_path, output_path):
        #init cv2

        pages = convert_from_path(input_path, 500)
        
        pages[0].save(output_path, 'JPEG')
        page1 = cv2.imread(output_path)

        page1_disp = page1
        for i in range(3):
            page1_disp = cv2.pyrDown(page1_disp)

        height, width, channels = page1.shape
        #print(width, height)
        #print(height)
        scale = height/int(self.page_height)
        for item in self.textboxes:
            if isinstance(item, LTTextBox) or isinstance(item, LTChar):
                #render cv2
                
                start = (int(item.x0 * scale), (height - int(item.y0 * scale)))
                end = (int(item.x1 * scale), (height - int(item.y1 * scale)))
                #print(start , end)
                color = (0, 0, 255)
                thickness = 5
                page1 = cv2.rectangle(page1, start, end, color, thickness)
            else:
                assert False, str(('Unhandled', item))

        page1 = cv2.rectangle(page1, (40,40), (50,50), (0,0,255), 2)
        boxed_disp = page1
        for i in range(3):
            boxed_disp = cv2.pyrDown(boxed_disp)

        while True:
            cv2.imshow('page', page1_disp)
            cv2.imshow('boxed', boxed_disp)
        
            #exit on ESC
            k = cv2.waitKey(30) & 0xFF
            if k == 27:
                break
            
        cv2.destroyAllWindows()

    def close(self):
        self.write_footer()
        return
Esempio n. 12
0
class DependencyReader:
    """DependencyReader object"""

    def __init__(self):
        self.tempDirectoryPath = mkdtemp(dir=".")
        self.tree = Tree()
        self.dependencies = {}
        self.graphRelationships = []

    def getPom(self, pomPath):
        shutil.copy(pomPath, self.tempDirectoryPath)
        os.chdir(self.tempDirectoryPath)

    def getDependencies(self):
        mavenTreeOutput = subprocess.Popen('mvn org.apache.maven.plugins:maven-dependency-plugin:RELEASE:tree -DoutputType=tgf', stdout=subprocess.PIPE, shell=True)

        while True:
            line = mavenTreeOutput.stdout.readline().rstrip()

            if not line or re.search(r"BUILD SUCCESS", line):
                break

            match = re.match(r"\[INFO\]\s(\d*)\s*(.*):(.*):(\w+):([0-9\.]*)", line)

            if match:
                if not match.group(1) in self.dependencies.keys():
                    self.dependencies[match.group(1)] = DependencyNode(match.group(2), match.group(3), match.group(5), match.group(1))

                if not self.tree.leaves():
                    self.tree.create_node(match.group(1), match.group(1), data=self.dependencies[match.group(1)])

                self.dependencies[match.group(1)].get('jar', self.tempDirectoryPath)

            match = re.match(r"\[INFO\]\s(\d*)\s(\d*)", line)

            if match and match.group(2):
                self.graphRelationships.append((match.group(1), match.group(2)))

    def relateDependencies(self):
        while self.graphRelationships:
            for item in self.graphRelationships:
                node = self.tree.get_node(item[0])

                if node is not None:
                    parent = self.dependencies[item[0]]
                    child = self.dependencies[item[1]]
                    self.tree.create_node(child.referenceId, child.referenceId, parent=parent.referenceId, data=child)
                    self.graphRelationships.remove(item)

    def scanDependencies(self):
        # Need to run on each package with oneshot to get identifiers
        # unless update dosocsv2 to create identifiers on scan
        # or fix up dosocsv2 to create identifiers on scan instead
        for node in self.tree.expand_tree(mode=Tree.DEPTH):
            treeNode = self.tree.get_node(node)
            subprocess.call('dosocs2 oneshot ' + treeNode.data.jarName, shell=True)

    def createRelationships(self):
        # Pass packages as relationships to new dosocsv2 command created
        self.recursiveRelationship(self.tree.root)

    def recursiveRelationship(self, parent):
        for node in self.tree.is_branch(parent):
            parentNode = self.tree.get_node(parent)
            childNode = self.tree.get_node(node)
            subprocess.call('dosocs2 packagerelate ' + parentNode.data.jarName + ' ' + childNode.data.jarName, shell=True)
            self.recursiveRelationship(node)

    def retrieve_dependencies(self, jarName):
        if jarName is None:
            root = self.tree.get_node(self.tree.root)
            root = root.data.jarName
        else:
            root = jarName

        tgfOutput = subprocess.Popen('dosocs2 dependencies ' + root, stdout=subprocess.PIPE, shell=True)
        count = 0
        tree = Tree()
        dependencies = []
        relationships = []
        while True:
            line = tgfOutput.stdout.readline()

            if not line:
                break

            match = re.match(r"(\d+) - (.*)", line)
            if match:
                if count == 0:
                    count = count + 1
                    tree.create_node(match.group(2), match.group(1))
                else:
                    dependencies.append((match.group(2), match.group(1)))

            match = re.match(r"(\d+) (\d+)", line)

            if match:
                relationships.append((match.group(1), match.group(2)))

        if not relationships:
            print("No child relationships for " + jarName)
            return None

        while relationships:
            for item in relationships:
                node = tree.get_node(item[0])

                if node is not None:
                    rel = [item for item in relationships if int(item[0]) == int(node.identifier)]
                    if rel is not None:
                        rel = rel[0]
                        dep = [item for item in dependencies if int(item[1]) == int(rel[1])]
                        if dep is not None:
                            dep = dep[0]
                            tree.create_node(dep[0], dep[1], parent=node.identifier)
                            relationships.remove(rel)
                            dependencies.remove(dep)

        tree.show()
        if jarName is None:
            os.chdir(os.pardir)
Esempio n. 13
0
print ("#" * 4 + "All family members without Diane sub-family")
tree.show(idhidden=False, filter=lambda x: x.identifier != "diane")
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag
print ("\n")


print ("#" * 4 + "Let me introduce Diane family only")
sub_t = tree.subtree("diane")
sub_t.show()
print ("\n")


print ("#" * 4 + "Children of Diane")
print tree.is_branch("diane")
print ("\n")


print ("#" * 4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste("jill", new_tree)
tree.show()
print ("\n")


print ("#" * 4 + "We are sorry they are gone accidently :(")
tree.remove_node(1)
class RST_DT:
    def load(self, path2file):
        self.id_EDUs = []
        self.EDU = {}
        self.treeNS = Tree()
        self.tree = Tree()
        # nombre max d'espace pour init id_parents
        with open(path2file, "r") as f:
            max_space = 0
            nb_line = 0
            for i, line in enumerate(f):
                nb_space = 0
                for c in line:
                    if c == " ":
                        nb_space += 1
                    else:
                        break
                if nb_space > max_space:
                    max_space = nb_space
                nb_line += 1
        with open(path2file, "r") as f:
            id_parents = [0] * max_space
            NS_parents = [0] * max_space
            for i, line in enumerate(f):
                # nombre d'espace détermine le parent
                nb_space = 0
                for c in line:
                    if c == " ":
                        nb_space += 1
                    else:
                        break
                space = nb_space / 2
                id_parents[space] = i
                parent = id_parents[space - 1]
                reg = "\(([\w\-\[\]]+)|(_!.+!_)"  # récupération du contenu
                match = re.findall(reg, line)[0]
                if match[0] == "":
                    content = match[1]  # feuille EDU
                    self.id_EDUs.append(i)
                    # print content
                    self.EDU[i] = re.findall("_!(.*)!_", content)
                else:
                    content = match[0]
                    reg2 = "\[(N|S)\]"  # récupération NS
                    match2 = re.findall(reg2, content)
                    NS_parents[space] = match2  # ['N','S']
                # création du noeud
                if i == 0:
                    self.tree.create_node(content, 0)
                    self.treeNS.create_node("Root", 0)
                else:
                    id_NS = len(self.tree.is_branch(parent))  # 0 ou 1 car arbre binaire
                    self.tree.create_node(content, i, parent=parent)
                    self.treeNS.create_node(NS_parents[space - 1][id_NS], i, parent=parent)

    def toDEP(self):

        ###############################
        # Etape 1 : construction du head_tree

        # parcours en largeur de tree afin de récupérer chaque id_node
        # pour chaque profondeur (init à 0) _! sans compter !_ les feuilles (EDUs)

        nodes_depth = [-1] * self.tree.size()
        for i in xrange(self.tree.size()):
            id_nodes = [0]
            depth = [999] * self.tree.size()
            while id_nodes:  # False if empty
                id_node = id_nodes.pop(0)
                node = self.tree.get_node(id_node)
                if node.bpointer != None:
                    node_parent = self.tree.get_node(node.bpointer)
                    depth[node.identifier] = depth[node_parent.identifier] + 1
                else:
                    depth[node.identifier] = 0
                if id_node == i:
                    # print 'noeud ',i,' en profondeur', depth[node.identifier]
                    if node.fpointer:
                        nodes_depth[i] = depth[i]
                    break
                if node.fpointer:
                    id_nodes.append(node.fpointer[0])
                    id_nodes.append(node.fpointer[1])
        # print nodes_depth

        id_nodes_depth = []
        for d in xrange(self.tree.depth()):
            id_nodes_depth.append([])
            for i in xrange(self.tree.size()):
                if nodes_depth[i] == d:
                    id_nodes_depth[d].append(i)
        # print id_nodes_depth

        #
        # construction du head_tree

        head_tree = [-1] * self.treeNS.size()
        # pour chaque noeud (non EDU/feuille) en partant de la plus grande profondeur dans l'arbre
        for d in range(len(id_nodes_depth) - 1, -1, -1):
            for id_node in id_nodes_depth[d]:
                node = self.treeNS.get_node(id_node)
                node_left = self.treeNS.get_node(node.fpointer[0])
                node_right = self.treeNS.get_node(node.fpointer[1])
                if node_left.tag == "N":
                    if head_tree[node_left.identifier] == -1:
                        identifier = node_left.identifier
                    else:
                        identifier = head_tree[node_left.identifier]
                else:
                    if head_tree[node_right.identifier] == -1:
                        identifier = node_right.identifier
                    else:
                        identifier = head_tree[node_right.identifier]
                head_tree[id_node] = identifier
        # print head_tree

        ###############################
        # Etape 2 : construction du DEP

        #
        # construction du DEP

        # init
        # root est le premier noeud de head
        # pour chaque EDU son père est le root dans DEP
        dep_tree = Tree()
        id_root = head_tree[0]
        root = self.tree.get_node(id_root)
        # dep_tree.create_node(root.tag, root.identifier)
        dep_tree.create_node(root.tag, root.identifier)
        for id_EDU in xrange(len(head_tree)):
            if head_tree[id_EDU] == -1 and id_EDU != id_root:
                node = self.tree.get_node(id_EDU)
                # dep_tree.create_node(node.tag, node.identifier, parent=id_root)
                # dep_tree.create_node(str(id_EDU), node.identifier, parent=id_root)
                dep_tree.create_node(node.tag, node.identifier, parent=id_root)

        # print '//////////////////////'
        # print 'EDU', id_root
        # pour chaque EDU
        for id_EDU in xrange(len(head_tree)):
            if head_tree[id_EDU] == -1 and id_EDU != id_root:

                EDU_NS = self.treeNS.get_node(id_EDU)
                # print '.......................'
                # print 'EDU', id_EDU
                # print 'TAG', EDU_NS.tag

                if EDU_NS.tag == "N":
                    # parcours en largeur jusqu'à trouver un S avec un head donc qui soit pas EDU
                    id_nodes = [EDU_NS.identifier]
                    visited = [False] * self.treeNS.size()
                    while id_nodes:
                        id_node = id_nodes.pop(0)
                        EDU = self.tree.get_node(id_node)
                        # print 'visited EDU', EDU.identifier
                        visited[EDU.identifier] = True
                        # cas d'arret
                        head_EDU = head_tree[EDU.identifier] == -1
                        head_EDU = False
                        node_tag = self.treeNS.get_node(EDU.identifier).tag
                        # print '  head_EDU', head_EDU
                        # print '  node_tag', node_tag
                        if not head_EDU and node_tag == "S":
                            break
                        if EDU.bpointer:
                            if not visited[EDU.bpointer]:
                                id_nodes.append(EDU.bpointer)
                        if EDU.fpointer:  # sécurité
                            if not visited[EDU.fpointer[0]]:
                                id_nodes.append(EDU.fpointer[0])
                            if not visited[EDU.fpointer[1]]:
                                id_nodes.append(EDU.fpointer[1])

                    # puis ajouter au DEP comme enfant du head du parent du noeud S
                    id_head = head_tree[EDU.bpointer]

                # si parent S
                else:
                    # parcours en largeur des ancêtre jusqu'à trouver un ancêtre avec un head
                    parent = self.treeNS.get_node(EDU_NS.bpointer)
                    id_head = head_tree[parent.identifier]

                # puis ajouter au DEP comme enfant de ce head
                if id_EDU != id_head:
                    dep_tree.move_node(id_EDU, id_head)
                EDU = self.tree.get_node(id_EDU)
                # print '---- ajout de',EDU.identifier,' à',id_head
                # if id_EDU == id_head:
                # dep_tree.show()

        return dep_tree
        # showDepth(dep_tree, 4)
        # dep_tree.show()

        # node = dep_tree.

    def toString(self):
        """ affiche comme la sortie de Hilda """
        showDepth(self.tree, 0)