Exemple #1
0
    '28': 'HR2',
    '30': 'G11012',
    '24': 'Pm1'
}

# for i in range(len(nos)):
#     dt[nos[i]] = dataorder[i]


def get_species_name(node_name_string):
    # Species code is the first part of leaf name (separated by an # underscore character)
    spcode = node_name_string
    # We could even translate the code to complete names
    code2name = dt
    return code2name[spcode]


t.set_species_naming_function(get_species_name)

for node in t.iter_search_nodes():
    if node.name == "43":
        node.dist = 5e-05

#t.show(tree_style=ts)
# t.render("/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_gain_loss_tree.pdf",tree_style=ts,w=3200,h=4800,dpi=200)
t.render(
    "/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_denovo_gene_gain_loss_tree.pdf",
    tree_style=ts,
    w=3200,
    h=4800,
    dpi=200)
Exemple #2
0
#!/usr/bin/python
from __future__ import absolute_import

import sys
from ete2 import PhyloTree

if __name__ == "__main__":
    t = sys.argv[1]
    s = sys.argv[2]
    out = sys.argv[3]

    pt = PhyloTree(t)
#    pt.link_to_alignment(alignment=s)
    pt.render(out)
Exemple #3
0
ts = TreeStyle()
# ts.mode = "c"

for i in open(in_id,"r").readlines():
    i=i.strip('\n')
    print i
    outf = "/".join(in_id.split('/')[:-1])+"/top_hits_pm1_madss/"+i+"_blastp_hits_"+in_eval+".fasta"
    no_hits = blast_gene(i,in_eval,indb,outf)
    print no_hits
    align_args = "/usr/local/bin/megacc -a "+ align_mao +" -o "+align_dir+" -s -d " + outf
    subprocess.Popen(align_args, shell=True).wait()
    sl(2)
    align_lis = glob.glob(align_dir + "/*.meg")
    alignpath = ''
    for j in align_lis:
        if i in j:
            tree_args = "/usr/local/bin/megacc -a "+ tree_mao +" -o "+tree_dir+" -d " + j
            subprocess.Popen(tree_args, shell=True).wait()
    tree_ls = glob.glob(tree_dir + "/*.nwk")
    for j in tree_ls:
        if i in j and "consensus" not in j:
            t = PhyloTree(j, format=1)
            #t.show()
            # t = Phylo.read(j,"newick")
            # #t.ladderize()
            # #Phylo.draw(t)
            # Phylo.write(t,j.replace(".nwk",".xml"),"phyloxml")
            # Phylo.draw_graphviz(t,prog="neato")
            t.render(tree_dir+"/"+i+"_blastp_hits_"+in_eval+".pdf",tree_style=ts,dpi=200)
### Align
if verbose: print "aligning..."
aln_file_name = os.path.splitext(temp_file_name)[0] + ".afa"
align_muscle(temp_file_name, aln_file_name, gapopen=-1000.0)

### Build tree
if verbose: print "building tree..."
tree, aln = build_tree_FT(aln_file_name)
### Show in pretty format
pretty_tree = PhyloTree(str(tree), alignment=aln_file_name, alg_format="fasta")

pretty_tree.ladderize()

ts = TreeStyle()

pretty_tree.render(outfile, tree_style=ts)

### Clean up your mess
os.remove(temp_file_name)
os.remove(aln_file_name)

### TODO
# highlight adapter rows
# root on adapter?

# tweak alignment parameters so it's better
# understand muscle alignment score. how can we rationally change parameters to improve?

# consider NJ. pairwise distances. how would you overlay alignment?
# would need pairwise alignment and distance for N^2 ~ 10,000 sequences
# what we have is simple and achieves the desired outcome