def extract_convex_subtrees(treeG, merged_nodes, del_nodes, ncbiG_tid2v, ncbiG_v2tid): '''returns a dictionary mapping taxon ids to any extracted convex subtrees from the given tree graph''' tids_checked = [] extracted_trees = {} taxid = treeG.vertex_properties['taxid'] subclass = treeG.vertex_properties['sub classification'] v_in_color = treeG.new_vertex_property('bool') #values should originally be None, after panning through, any remaining None and not False are assumed to be True treeG.vertex_properties['v colored'] = v_in_color v_extracted = treeG.new_vertex_property('bool') treeG.vertex_properties['v extracted'] = v_extracted for v in treeG.vertices(): v_extracted[v] = 1 #maskecd only when 0 treeG.set_vertex_filter(v_extracted) is_leaf = treeG.vertex_properties['is leaf'] for v in treeG.vertices(): if is_leaf[v]: sub = subclass[v] for tid in sub: if tid in tids_checked: break else: tids_checked.append(tid) print 'checkin tid ' + str(tid) +' for convexity' color_for_tid(tid, treeG) #add some comments, meaning convexity, significance, etc is_convex, root, parent = check_convex(treeG) if is_convex: if check_significance(treeG, root, parent): print 'convex' t = extract_single_convex_subtree(treeG, root, parent) t_mrcr = tree_to_graph.fullclasses_to_mrcr(tree_to_graph.tids_to_fullclasses(tree_to_graph.tree_to_tids(t[0], merged_nodes.keys(), merged_nodes, del_nodes), ncbiG_tid2v, ncbiG_v2tid)) extracted_trees[t_mrcr] = t v_extracted[root] = 1 print "not significant" else: print "not convex" return extracted_trees, tids_checked
print i print "new tree" s = line.split('\t') tree_name = s[0] data.write(tree_name + '\n') print "loading tree" t = ivy.tree.read(s[1][:-1]) num_leaves = len(list(t.leaves())) data.write("n leaves: " + str(num_leaves) + '\n') print "parent tree # leaves = " + str(num_leaves) print "converting tree to graph" treeG = tree_to_graph.tree_2_G(t) tree_to_graph.ammend_treeG_taxid_map(treeG, merged_nodes, del_nodes) tree_to_graph.treeG_add_istaxonclade_map(treeG, ncbiG, ncbiG_tid2v) tids = tree_to_graph.treeG_to_tids(treeG) full_classes = tree_to_graph.tids_to_fullclasses(tids, ncbiG_tid2v, ncbiG_v2tid) mrcr = tree_to_graph.fullclasses_to_mrcr(full_classes) #change to ancestor, note that is a taxonomic 'ancestor' data.write("mrcr taxid: " + str(mrcr) + '\n') if not NCBIgraph.is_descendant(Eukaryota_tid, mrcr, ncbiG_tid2v, ncbiG_v2tid): print "not within Eukaryota" data.write("not Eukaryota\n") continue treeG_v2tid = treeG.vertex_properties['taxid'] tree_to_graph.treeG_add_subclassification_map(treeG, mrcr, ncbiG_tid2v, ncbiG_v2tid) tree_to_graph.treeG_add_istaxonclade_map(treeG, ncbiG, ncbiG_tid2v) n_leaves, n_unique_tids, n_unclassified_leaves, n_unique_unclassified_tids = tree_to_graph.leaf_stats(treeG, ncbiG_v2tid) data.write("n unique taxons: " + str(n_unique_tids) + '\n') data.write("n unclassified leaves: " + str(n_unclassified_leaves) + '\n') data.write("n unique unclassified taxons: " + str(n_unique_unclassified_tids) + '\n') print "finding convex subtrees" convex_subtrees, tids_checked = convex_colored_subtrees.extract_convex_subtrees(treeG, merged_nodes, del_nodes, ncbiG_tid2v, ncbiG_v2tid)