def get_sum_of_branches(treepath): fin = open(treepath, "r") newick = fin.readline().strip() t = Tree() t.read_from_string(newick.__str__(), "newick") fin.close() return t.length()
def reroot_tree(tstr): """Input: a tree path to a Newick tree. Output: a re-rooted version of the tree, based on the outgroup defined in configuration.py""" t = Tree() t.read_from_string(tstr.__str__(), "newick") og = ap.params["outgroup"] og = re.sub("\[", "", og) og = re.sub("\]", "", og) og = re.sub("\"", "", og) ogs = og.split(",") mrca = t.mrca(taxon_labels=ogs) t.reroot_at_edge(mrca.edge, update_splits=False) ret = t.as_string("newick") ret = re.sub("\[\&\R\] ", "", ret) ret = ret.strip() return ret
def index_mutations(con): """Builds an index of all mutations""" cur = con.cursor() for msaid in get_alignment_method_ids(con): for modelid in get_phylo_modelids(con): newick = get_anc_cladogram(con, msaid, modelid) t = Tree() t.read_from_string(newick, "newick") for edge in t.preorder_edge_iter(): if edge.head_node == None or edge.tail_node == None: continue if edge.head_node.label == None or edge.tail_node.label == None: continue print msaid, modelid, edge.head_node.label, edge.tail_node.label anc1name = "Node" + edge.head_node.label.__str__() anc2name = "Node" + edge.tail_node.label.__str__() index_mutations_helper(con, msaid, modelid, anc1name, anc2name)
def return_trees_from_trace(path): print "Parsing trace:", path trees = [] lnls = [] fin = open(path, "r") last_tree = None last_lnl = 0.0 count_unique_trees = 0 for line in fin.xreadlines(): treestring = "" lnlstring = "" found_tree = False for c in line: if found_tree == False and c != "]" and c != "[" and c != "(": lnlstring += c if c == "(": found_tree = True if found_tree == True: treestring += c lnl = float(lnlstring) t = Tree() t.read_from_string(line, "newick") if last_tree != None: #2nd->nth trees in the list #sd = last_tree.symmetric_difference(t) #sd = t.symmetric_difference(last_tree) if last_lnl < lnl: trees.append(t) lnls.append("%.2f"%lnl) count_unique_trees += 1 else: trees[trees.__len__()-1] = t lnls[lnls.__len__()-1] = "%.2f"%lnl else: #first tree in the list trees.append(t) lnls.append("%.2f"%lnl) count_unique_trees += 1 last_tree = t last_lnl = lnl print count_unique_trees, lnl trees.append(last_tree) lnls.append("%.2f"%lnl) fin.close() return [trees, lnls]
def reroot_newick(con, newick): """Provide a newick string, this method will re-root the tree based on the 'outgroup' setting.""" cur = con.cursor() dendrotree = Tree() dendrotree.read_from_string(newick, "newick") sql = "select shortname from Taxa where id in (select taxonid from GroupsTaxa where groupid in (select id from TaxaGroups where name='outgroup'))" cur.execute(sql) rrr = cur.fetchall() outgroup_labels = [] for iii in rrr: label = re.sub("_", " ", iii[0]) outgroup_labels.append( label.__str__() ) mrca = dendrotree.mrca(taxon_labels=outgroup_labels) if mrca.edge.tail_node != None and mrca.edge.head_node != None: dendrotree.reroot_at_edge(mrca.edge, update_splits=True) newick = dendrotree.as_string("newick") return newick
def compute_tree_distances(con): cur = con.cursor() cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(1, 'symmetric')") cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(2, 'euclidean')") con.commit() treeid_dendropytree = {} sql = "select id, almethod, phylomodelid, newick from UnsupportedMlPhylogenies" con.execute(sql) x = cur.fetchall() for ii in x: treeid = ii[0] t = Tree() t.read_from_string(newick, "newick") treeid_dendropytree[treeid] = t for ii in treeid_dendropytree: treeii = treeid_dendropytree[ii] this_row = [] for jj in treeid_dendropytree: treejj = treeid_dendropytree[jj] """Symmetric Distance""" distance = treeii.symmetric_difference(treejj) """Store the computed distance in the database.""" sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values(" sql += "1," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")" cur.execute(sql) """Euclidean Distance""" distance = treeii.euclidean_distance(treejj) """Store the computed distance in the database.""" sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values(" sql += "2," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")" cur.execute(sql) con.commit()
def match_ancestors_across_models(con): """This method fills data in the table AncestorsAcrossModels""" cur = con.cursor() modelids = get_phylo_modelids(con) msaids = get_alignment_method_ids(con) ancid_childrenids = {} # key = Ancestor ID, value = list of Taxa IDs """Pull the map of taxon names to IDs from the database. We'll access this information a lot, so let's save it in a separate hashtable rather than repeatedly querying the databse.""" taxonname_id = {} sql = "select id, shortname from Taxa" cur.execute(sql) for ii in cur.fetchall(): id = ii[0] name = ii[1] taxonname_id[name] = id for modelid in modelids: for msaid in msaids: sql = "select newick from AncestralCladogram where unsupportedmltreeid in (select id from UnsupportedMlPhylogenies where almethod=" + msaid.__str__( ) + " and phylomodelid=" + modelid.__str__() + ")" cur.execute(sql) xx = cur.fetchone() if xx is None: write_error(con, "I cannot find the ancestral Newick cladogram for almethod=" + msaid.__str__() + " and phylomodelid=" + modelid.__str__()) cladonewick = xx[0].__str__() t = Tree() t.read_from_string(cladonewick, "newick") for node in t.nodes(): if node.is_leaf() == False and node.level() > 0: sql = "select id from Ancestors where name='Node" + node.label + \ "' and almethod=" + msaid.__str__() + " and phylomodel=" + modelid.__str__() cur.execute(sql) ancid = cur.fetchone()[0] ancid_childrenids[ancid] = [] for l in node.leaf_iter(): # print "978:", l taxonname = l.as_newick_string() # print "980:", taxonname taxonname = re.sub("'", "", taxonname) ancid_childrenids[ancid].append( taxonname_id[taxonname]) # key = Ancestor ID, value = list of other ancestor IDs with the same # children. ancid_matches = {} for anc1 in ancid_childrenids: ancid_matches[anc1] = [] mychildren = ancid_childrenids[anc1] mychildren.sort() for anc2 in ancid_childrenids: if anc1 == anc2: """Skip the self comparison.""" continue theirchildren = ancid_childrenids[anc2] theirchildren.sort() if mychildren == theirchildren: ancid_matches[anc1].append(anc2) sql = "delete from AncestorsAcrossModels" cur.execute(sql) con.commit() for anc1 in ancid_matches: for anc2 in ancid_matches[anc1]: sql = "insert into AncestorsAcrossModels (ancid, same_ancid) values(" + anc1.__str__( ) + "," + anc2.__str__() + ")" cur.execute(sql) con.commit()