Exemple #1
0
def get_sum_of_branches(treepath):
    fin = open(treepath, "r")
    newick = fin.readline().strip()
    t = Tree()
    t.read_from_string(newick.__str__(), "newick")
    fin.close()
    return t.length()
Exemple #2
0
def reroot_tree(tstr):
    """Input: a tree path to a Newick tree.  Output: a re-rooted version of the tree, based on the outgroup defined in configuration.py"""
    t = Tree()
    t.read_from_string(tstr.__str__(), "newick")
    og = ap.params["outgroup"]
    og = re.sub("\[", "", og)
    og = re.sub("\]", "", og)
    og = re.sub("\"", "", og)
    ogs = og.split(",")
    mrca = t.mrca(taxon_labels=ogs)
    t.reroot_at_edge(mrca.edge, update_splits=False)
    ret = t.as_string("newick")
    ret = re.sub("\[\&\R\] ", "", ret)
    ret = ret.strip()
    return ret
def index_mutations(con):
    """Builds an index of all mutations"""
    cur = con.cursor()
    
    for msaid in get_alignment_method_ids(con):
        for modelid in get_phylo_modelids(con):
            newick = get_anc_cladogram(con, msaid, modelid)
            t = Tree()
            t.read_from_string(newick, "newick")
            for edge in t.preorder_edge_iter():
                if edge.head_node == None or edge.tail_node == None:
                    continue
                if edge.head_node.label == None or edge.tail_node.label == None:
                    continue 
                print msaid, modelid, edge.head_node.label, edge.tail_node.label
                anc1name = "Node" + edge.head_node.label.__str__()
                anc2name = "Node" + edge.tail_node.label.__str__()
                index_mutations_helper(con, msaid, modelid, anc1name, anc2name)
Exemple #4
0
def return_trees_from_trace(path):
    print "Parsing trace:", path
    trees = []
    lnls = []
    fin = open(path, "r")
    last_tree = None
    last_lnl = 0.0
    count_unique_trees = 0
    for line in fin.xreadlines():
        treestring = ""
        lnlstring = ""
        found_tree = False
        for c in line:
            if found_tree == False and c != "]" and c != "[" and c != "(":
                lnlstring += c
            if c == "(":
                found_tree = True
            if found_tree == True:
                treestring += c
        lnl = float(lnlstring)
        t = Tree()
        t.read_from_string(line, "newick")
        if last_tree != None: #2nd->nth trees in the list
            #sd = last_tree.symmetric_difference(t)
            #sd = t.symmetric_difference(last_tree)
            if last_lnl < lnl:
                trees.append(t)
                lnls.append("%.2f"%lnl)
                count_unique_trees += 1
            else:
                trees[trees.__len__()-1] = t
                lnls[lnls.__len__()-1] = "%.2f"%lnl
        else: #first tree in the list
            trees.append(t)
            lnls.append("%.2f"%lnl)
            count_unique_trees += 1
        last_tree = t
        last_lnl = lnl
        print count_unique_trees, lnl
    trees.append(last_tree)
    lnls.append("%.2f"%lnl)
    fin.close()
    return [trees, lnls]
Exemple #5
0
def reroot_newick(con, newick):
    """Provide a newick string, this method will re-root the tree
        based on the 'outgroup' setting."""
    cur = con.cursor()
    dendrotree = Tree()
    dendrotree.read_from_string(newick, "newick")
    sql = "select shortname from Taxa where id in (select taxonid from GroupsTaxa where groupid in (select id from TaxaGroups where name='outgroup'))"
    cur.execute(sql)
    rrr = cur.fetchall()
    outgroup_labels = []
    for iii in rrr:
        label = re.sub("_", " ", iii[0])
        outgroup_labels.append( label.__str__() )
    
    mrca = dendrotree.mrca(taxon_labels=outgroup_labels)
    if mrca.edge.tail_node != None and mrca.edge.head_node != None:
        dendrotree.reroot_at_edge(mrca.edge, update_splits=True)
    newick = dendrotree.as_string("newick")
    return newick
Exemple #6
0
def compute_tree_distances(con):
    cur = con.cursor()
    cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(1, 'symmetric')")
    cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(2, 'euclidean')")
    con.commit()
    
    treeid_dendropytree = {}
    sql = "select id, almethod, phylomodelid, newick from UnsupportedMlPhylogenies"
    con.execute(sql)
    x = cur.fetchall()
    for ii in x:
        treeid = ii[0]
        t = Tree()
        t.read_from_string(newick, "newick")
        treeid_dendropytree[treeid] = t
        
    for ii in treeid_dendropytree:
        treeii = treeid_dendropytree[ii]
        this_row = []
        for jj in treeid_dendropytree:
            treejj = treeid_dendropytree[jj]
            
            """Symmetric Distance"""
            distance = treeii.symmetric_difference(treejj)
            """Store the computed distance in the database."""
            sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values("
            sql += "1," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")"
            cur.execute(sql)

            """Euclidean Distance"""
            distance = treeii.euclidean_distance(treejj)
            """Store the computed distance in the database."""
            sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values("
            sql += "2," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")"
            cur.execute(sql)
    
    con.commit()
                        
                        
            
    
Exemple #7
0
def match_ancestors_across_models(con):
    """This method fills data in the table AncestorsAcrossModels"""
    cur = con.cursor()

    modelids = get_phylo_modelids(con)
    msaids = get_alignment_method_ids(con)

    ancid_childrenids = {}  # key = Ancestor ID, value = list of Taxa IDs

    """Pull the map of taxon names to IDs from the database.
        We'll access this information a lot, so let's save it in a separate hashtable
        rather than repeatedly querying the databse."""
    taxonname_id = {}
    sql = "select id, shortname from Taxa"
    cur.execute(sql)
    for ii in cur.fetchall():
        id = ii[0]
        name = ii[1]
        taxonname_id[name] = id

    for modelid in modelids:
        for msaid in msaids:
            sql = "select newick from AncestralCladogram where unsupportedmltreeid in (select id from UnsupportedMlPhylogenies where almethod=" + msaid.__str__(
            ) + " and phylomodelid=" + modelid.__str__() + ")"
            cur.execute(sql)
            xx = cur.fetchone()
            if xx is None:
                write_error(con, "I cannot find the ancestral Newick cladogram for almethod=" +
                            msaid.__str__() + " and phylomodelid=" + modelid.__str__())
            cladonewick = xx[0].__str__()

            t = Tree()
            t.read_from_string(cladonewick, "newick")

            for node in t.nodes():
                if node.is_leaf() == False and node.level() > 0:
                    sql = "select id from Ancestors where name='Node" + node.label + \
                        "' and almethod=" + msaid.__str__() + " and phylomodel=" + modelid.__str__()
                    cur.execute(sql)
                    ancid = cur.fetchone()[0]
                    ancid_childrenids[ancid] = []

                    for l in node.leaf_iter():
                        # print "978:", l
                        taxonname = l.as_newick_string()
                        # print "980:", taxonname
                        taxonname = re.sub("'", "", taxonname)
                        ancid_childrenids[ancid].append(
                            taxonname_id[taxonname])

    # key = Ancestor ID, value = list of other ancestor IDs with the same
    # children.
    ancid_matches = {}
    for anc1 in ancid_childrenids:
        ancid_matches[anc1] = []
        mychildren = ancid_childrenids[anc1]
        mychildren.sort()
        for anc2 in ancid_childrenids:
            if anc1 == anc2:
                """Skip the self comparison."""
                continue
            theirchildren = ancid_childrenids[anc2]
            theirchildren.sort()
            if mychildren == theirchildren:
                ancid_matches[anc1].append(anc2)

    sql = "delete from AncestorsAcrossModels"
    cur.execute(sql)
    con.commit()

    for anc1 in ancid_matches:
        for anc2 in ancid_matches[anc1]:
            sql = "insert into AncestorsAcrossModels (ancid, same_ancid) values(" + anc1.__str__(
            ) + "," + anc2.__str__() + ")"
            cur.execute(sql)
    con.commit()