Beispiel #1
0
def extract_convex_subtrees(treeG, merged_nodes, del_nodes, ncbiG_tid2v, ncbiG_v2tid):
    '''returns a dictionary mapping taxon ids to any extracted convex subtrees from the given tree graph'''
    tids_checked = []
    extracted_trees = {}
    taxid = treeG.vertex_properties['taxid']
    subclass = treeG.vertex_properties['sub classification']
    v_in_color = treeG.new_vertex_property('bool') #values should originally be None, after panning through, any remaining None and not False are assumed to be True
    treeG.vertex_properties['v colored'] = v_in_color
    v_extracted = treeG.new_vertex_property('bool')
    treeG.vertex_properties['v extracted'] = v_extracted
    for v in treeG.vertices():
        v_extracted[v] = 1 #maskecd only when 0
    treeG.set_vertex_filter(v_extracted)
    is_leaf = treeG.vertex_properties['is leaf']
    for v in treeG.vertices():
        if is_leaf[v]:
            sub = subclass[v]
            for tid in sub:
                if tid in tids_checked:
                    break
                else:
                    tids_checked.append(tid)
                    print 'checkin tid ' + str(tid) +' for convexity'
                    color_for_tid(tid, treeG) #add some comments, meaning convexity, significance, etc
                    is_convex, root, parent = check_convex(treeG)
                    if is_convex:
                        if check_significance(treeG, root, parent):
                            print 'convex'
                            t = extract_single_convex_subtree(treeG, root, parent)
                            t_mrcr = tree_to_graph.fullclasses_to_mrcr(tree_to_graph.tids_to_fullclasses(tree_to_graph.tree_to_tids(t[0], merged_nodes.keys(), merged_nodes, del_nodes), ncbiG_tid2v, ncbiG_v2tid))
                            extracted_trees[t_mrcr] = t
                            v_extracted[root] = 1
                        print "not significant"
                    else:
                        print "not convex"
    return extracted_trees, tids_checked
Beispiel #2
0
 print i
 print "new tree"
 s = line.split('\t')
 tree_name = s[0]
 data.write(tree_name + '\n')
 print "loading tree"
 t = ivy.tree.read(s[1][:-1])
 num_leaves = len(list(t.leaves()))
 data.write("n leaves: " + str(num_leaves) + '\n')
 print "parent tree # leaves = " + str(num_leaves)
 print "converting tree to graph"
 treeG = tree_to_graph.tree_2_G(t)
 tree_to_graph.ammend_treeG_taxid_map(treeG, merged_nodes, del_nodes)
 tree_to_graph.treeG_add_istaxonclade_map(treeG, ncbiG, ncbiG_tid2v)
 tids = tree_to_graph.treeG_to_tids(treeG)
 full_classes = tree_to_graph.tids_to_fullclasses(tids, ncbiG_tid2v, ncbiG_v2tid)
 mrcr = tree_to_graph.fullclasses_to_mrcr(full_classes) #change to ancestor, note that is a taxonomic 'ancestor'
 data.write("mrcr taxid: " + str(mrcr) + '\n')
 if not NCBIgraph.is_descendant(Eukaryota_tid, mrcr, ncbiG_tid2v, ncbiG_v2tid):
     print "not within Eukaryota"
     data.write("not Eukaryota\n")
     continue
 treeG_v2tid = treeG.vertex_properties['taxid']
 tree_to_graph.treeG_add_subclassification_map(treeG, mrcr, ncbiG_tid2v, ncbiG_v2tid)
 tree_to_graph.treeG_add_istaxonclade_map(treeG, ncbiG, ncbiG_tid2v)
 n_leaves, n_unique_tids, n_unclassified_leaves, n_unique_unclassified_tids = tree_to_graph.leaf_stats(treeG, ncbiG_v2tid)
 data.write("n unique taxons: " + str(n_unique_tids) + '\n')
 data.write("n unclassified leaves: " + str(n_unclassified_leaves) + '\n')
 data.write("n unique unclassified taxons: " + str(n_unique_unclassified_tids) + '\n')
 print "finding convex subtrees"
 convex_subtrees, tids_checked = convex_colored_subtrees.extract_convex_subtrees(treeG, merged_nodes, del_nodes, ncbiG_tid2v, ncbiG_v2tid)