Exemple #1
0
    def build(self, root='midpoint', raxml=True, raxml_time_limit=0.5):
        from Bio import Phylo, AlignIO
        import subprocess, glob, shutil
        make_dir(self.run_dir)
        os.chdir(self.run_dir)
        for seq in self.aln: seq.name=seq.id
        AlignIO.write(self.aln, 'temp.fasta', 'fasta')

        tree_cmd = ["fasttree"]
        if self.nuc: tree_cmd.append("-nt")
        tree_cmd.append("temp.fasta")
        tree_cmd.append(">")
        tree_cmd.append("initial_tree.newick")
        os.system(" ".join(tree_cmd))

        out_fname = "tree_infer.newick"
        if raxml:
            if raxml_time_limit>0:
                tmp_tree = Phylo.read('initial_tree.newick','newick')
                resolve_iter = 0
                resolve_polytomies(tmp_tree)
                while (not tmp_tree.is_bifurcating()) and (resolve_iter<10):
                    resolve_iter+=1
                    resolve_polytomies(tmp_tree)
                Phylo.write(tmp_tree,'initial_tree.newick', 'newick')
                AlignIO.write(self.aln,"temp.phyx", "phylip-relaxed")
                print( "RAxML tree optimization with time limit", raxml_time_limit,  "hours")
                # using exec to be able to kill process
                end_time = time.time() + int(raxml_time_limit*3600)
                process = subprocess.Popen("exec raxml -f d -T " + str(self.nthreads) + " -j -s temp.phyx -n topology -c 25 -m GTRCAT -p 344312987 -t initial_tree.newick", shell=True)
                while (time.time() < end_time):
                    if os.path.isfile('RAxML_result.topology'):
                        break
                    time.sleep(10)
                process.terminate()

                checkpoint_files = glob.glob("RAxML_checkpoint*")
                if os.path.isfile('RAxML_result.topology'):
                    checkpoint_files.append('RAxML_result.topology')
                if len(checkpoint_files) > 0:
                    last_tree_file = checkpoint_files[-1]
                    shutil.copy(last_tree_file, 'raxml_tree.newick')
                else:
                    shutil.copy("initial_tree.newick", 'raxml_tree.newick')
            else:
                shutil.copy("initial_tree.newick", 'raxml_tree.newick')

            try:
                print("RAxML branch length optimization")
                os.system("raxml -f e -T " + str(self.nthreads) + " -s temp.phyx -n branches -c 25 -m GTRGAMMA -p 344312987 -t raxml_tree.newick")
                shutil.copy('RAxML_result.branches', out_fname)
            except:
                print("RAxML branch length optimization failed")
                shutil.copy('raxml_tree.newick', out_fname)
        else:
            shutil.copy('initial_tree.newick', out_fname)
        self.tt_from_file(out_fname, root)
        os.chdir('..')
        remove_dir(self.run_dir)
        self.is_timetree=False
Exemple #2
0
def convert_boottrees(fname_trees):
    out_fnames = []
    for i, tree in enumerate(Phylo.parse(fname_trees, "newick")):
        fname_tree = "%s.codeml-%d" % (fname_trees, i)
        Phylo.write(tree, fname_tree, "newick")
        out_fnames.append(fname_tree)
    return out_fnames
Exemple #3
0
 def test_root_with_outgroup(self):
     """Tree.root_with_outgroup: reroot at a given clade."""
     # On a large realistic tree, at a deep internal node
     tree = Phylo.read(EX_APAF, 'phyloxml')
     orig_num_tips = len(tree.get_terminals())
     orig_tree_len = tree.total_branch_length()
     tree.root_with_outgroup('19_NEMVE', '20_NEMVE')
     self.assertEqual(orig_num_tips, len(tree.get_terminals()))
     self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
     # Now, at an external node
     tree.root_with_outgroup('1_BRAFL')
     self.assertEqual(orig_num_tips, len(tree.get_terminals()))
     self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
     # Specifying outgroup branch length mustn't change the total tree size
     tree.root_with_outgroup('2_BRAFL', outgroup_branch_length=0.5)
     self.assertEqual(orig_num_tips, len(tree.get_terminals()))
     self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
     tree.root_with_outgroup('36_BRAFL', '37_BRAFL',
             outgroup_branch_length=0.5)
     self.assertEqual(orig_num_tips, len(tree.get_terminals()))
     self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
     # On small contrived trees, testing edge cases
     for small_nwk in (
             '(A,B,(C,D));',
             '((E,F),((G,H)),(I,J));',
             '((Q,R),(S,T),(U,V));',
             '(X,Y);',
             ):
         tree = Phylo.read(StringIO(small_nwk), 'newick')
         orig_tree_len = tree.total_branch_length()
         for node in list(tree.find_clades()):
             tree.root_with_outgroup(node)
             self.assertAlmostEqual(orig_tree_len,
                                    tree.total_branch_length())
Exemple #4
0
 def initUI(self):        
     #field for drawing Ascii tree
     self.textEdit = QtGui.QTextEdit()
     self.textEdit.setReadOnly(True)
     self.textEdit.setFontFamily('Courier')
     self.textEdit.setWordWrapMode(True)
     #self.textEdit.setStyleSheet('')
     
     # layout
     self.layout = QtGui.QVBoxLayout(self)
     self.layout.addWidget(self.textEdit)
     self.setLayout(self.layout)
     
     #print tree
     self.tmpf = open('/tmp/ascii.txt', 'w')
     Phylo.draw_ascii(self.tree, self.tmpf)
     
     self.tmpf = open('/tmp/ascii.txt', 'r')
     with self.tmpf:        
             self.data = self.tmpf.read()
             self.textEdit.setText(self.data)
     
     self.setGeometry(200, 200, 700, 400)
     self.setWindowTitle('Tekstowe wyswietlanie')
     self.show()
Exemple #5
0
    def test_newick_read_scinot(self):
        """Parse Newick branch lengths in scientific notation."""
        tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), 'newick')
        clade_a = tree.clade[0]
        self.assertEqual(clade_a.name, 'foo')
        self.assertAlmostEqual(clade_a.branch_length, 0.1)


        """Additional tests to check correct parsing"""
        tree = Phylo.read(StringIO("(A:1, B:-2, (C:3, D:4):-2)"),'newick')
        self.assertEqual(tree.distance('A'),1)
        self.assertEqual(tree.distance('B'),-2)
        self.assertEqual(tree.distance('C'),1)
        self.assertEqual(tree.distance('D'),2)

        tree = Phylo.read(StringIO("((A:1, B:-2):-5, (C:3, D:4):-2)"),'newick')
        self.assertEqual(tree.distance('A'),-4)
        self.assertEqual(tree.distance('B'),-7)
        self.assertEqual(tree.distance('C'),1)
        self.assertEqual(tree.distance('D'),2)

        tree = Phylo.read(StringIO("((:1, B:-2):-5, (C:3, D:4):-2)"),'newick')
        distances = {-4.0:1,-7.0:1,1:1,2:1}
        for x in tree.get_terminals():
            entry = int(tree.distance(x))
            distances[entry] -= distances[entry]
            self.assertEqual(distances[entry],0)

        tree = Phylo.read(StringIO("((:\n1\n,\n B:-2):-5, (C:3, D:4):-2);"),'newick')
        distances = {-4.0:1,-7.0:1,1:1,2:1}
        for x in tree.get_terminals():
            entry = int(tree.distance(x))
            distances[entry] -= distances[entry]
            self.assertEqual(distances[entry],0)
 def test_draw_ascii(self):
     """Tree to Graph conversion, if networkx is available."""
     handle = StringIO()
     tree = Phylo.read(EX_APAF, 'phyloxml')
     Phylo.draw_ascii(tree, file=handle)
     Phylo.draw_ascii(tree, file=handle, column_width=120)
     handle.close()
Exemple #7
0
 def test_convert_phyloxml_filename(self):
     """Write phyloxml to a given filename."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     tmp_filename = tempfile.mktemp()
     count = Phylo.write(trees, tmp_filename, "phyloxml")
     os.remove(tmp_filename)
     self.assertEqual(13, count)
Exemple #8
0
    def reroot_tree_with_outgroup(tree_name, outgroups):
        clade_outgroups = GubbinsCommon.get_monophyletic_outgroup(tree_name, outgroups)
        outgroups = [{"name": taxon_name} for taxon_name in clade_outgroups]

        tree = Phylo.read(tree_name, "newick")
        tree.root_with_outgroup(*outgroups)
        Phylo.write(tree, tree_name, "newick")

        tree = dendropy.Tree.get_from_path(tree_name, "newick", preserve_underscores=True)
        tree.deroot()
        tree.update_bipartitions()
        output_tree_string = tree.as_string(
            schema="newick",
            suppress_leaf_taxon_labels=False,
            suppress_leaf_node_labels=True,
            suppress_internal_taxon_labels=False,
            suppress_internal_node_labels=False,
            suppress_rooting=True,
            suppress_edge_lengths=False,
            unquoted_underscores=True,
            preserve_spaces=False,
            store_tree_weights=False,
            suppress_annotations=True,
            annotations_as_nhx=False,
            suppress_item_comments=True,
            node_label_element_separator=" ",
        )
        with open(tree_name, "w+") as output_file:
            output_file.write(output_tree_string.replace("'", ""))
            output_file.closed
Exemple #9
0
 def phylo2newick(self, t):
     """
     Convert Phylo into Newick tree string.
     """
     output = StringIO()
     Phylo.write(t, output, "newick")
     return output.getvalue()
def make_tree_figure(wanted_seqs, trop_dict, tree_file):
    mat_data = get_pairwise_distances(wanted_seqs, tree_file = tree_file)
    tree = Phylo.read(open(tree_file), 'newick')
    net = Phylo.to_networkx(tree)
    
    node_mapping = {}
    clade = 1
    for node in net.nodes():
        if node.name is None:
            node_mapping[node] = 'Clade-%i' % clade
            clade += 1
        else:
            node_mapping[node] = node.name
    new_net = networkx.relabel_nodes(net, node_mapping)
    
    colors = []
    for node in new_net.nodes():
        if node.startswith('Clade'):
            colors.append('w')
        elif trop_dict[node]:
            colors.append('g')
        elif not trop_dict[node]:
            colors.append('r')
        else:
            print node
    #print colors, len(colors), len(new_net.nodes())
    pos = networkx.graphviz_layout(new_net, 'twopi')
    
    networkx.draw_networkx(new_net, pos, with_labels = False, node_color = colors)
def tree(from_cluster,to_cluster, grupa):

    consensus_trees = []

    for i in [x for x in range(from_cluster,to_cluster)]:

        msa = AlignIO.read('msa\msa_rodzina_' + str(i)+ '_s.fasta', 'fasta')
        print i
        calculator = DistanceCalculator('identity')

        try:
            dm = calculator.get_distance(msa)
            constructor = DistanceTreeConstructor(calculator, 'nj')
            trees = bootstrap_trees(msa, 50, constructor)

            trees_list = list(trees)
            not_included = set([])

            for j in range(len(trees_list)):
                target_tree = trees_list[j]
                support_tree = get_support(target_tree, trees_list)

            for node in support_tree.get_nonterminals():
                if node.confidence < 50:
                    not_included.add(j)

            trees = [trees_list[k] for k in range(len(trees_list)) if k not in not_included]

            if len(trees) > 0:
                consensus_trees.append(majority_consensus(trees))

        except:
            ValueError

    Phylo.write(consensus_trees,"drzewa_wynikowe_" + str(grupa),"newick")
Exemple #12
0
def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False):
    """Return Phylo from TaxonNamesResolver class."""
    ranks = resolver.retrieve('classification_path_ranks')
    qnames = resolver.retrieve('query_name')
    lineages = resolver.retrieve('classification_path')
    # replace ' ' with '_' for taxon tree
    qnames = [re.sub("\s", "_", e) for e in qnames]
    resolved_names_bool = [e in namesdict.keys() for e in qnames]
    ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e]
    lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e]
    # identify unresolved names
    unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool)
                        if not e]
    idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e]
    statement = "Unresolved names: "
    for each in unresolved_names:
        statement += " " + each
    logger.debug(statement)
    # make taxdict
    taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages,
                      taxonomy=taxonomy)
    # make treestring
    treestring = taxTree(taxdict)
    if not taxonomy:
        d = 22  # default_taxonomy + 1 in tnr
    else:
        d = len(taxonomy) + 1
    # add outgroup
    treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d))
    tree = Phylo.read(StringIO(treestring), "newick")
    if draw:
        Phylo.draw_ascii(tree)
    return tree
def draw_tree(tree, node_label = node_label_func, branch_label = lambda x:None,
                  cmap = cm.jet, axes = None, cb = True):
    '''
    plots a tree on an empty canvas including a scalebar of length 0.005
    '''
    import matplotlib.pyplot as plt
    from Bio import Phylo
    if axes is None:
        fig = plt.figure(figsize = (8,6))
        axes = plt.subplot(111)
    Phylo.draw(tree, label_func = node_label, 
               show_confidence = False,branch_labels = branch_label, axes=axes)
    axes.axis('off')
    xlimits = axes.get_xlim()
    ylimits = axes.get_ylim()
    x0 = xlimits[0]+(xlimits[1]-xlimits[0])*0.05
    x1 = x0+0.005
    y0 = ylimits[0]+(ylimits[1]-ylimits[0])*0.05
    plt.plot([x0,x1], [y0,y0], lw=2, c='k')
    plt.text(x0+0.0025, y0+(ylimits[1]-y0)*0.01, '0.005', ha='center')
    # fake a colorbar
    if cb:
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=0, vmax=100))
        sm._A = []
        cbar = plt.colorbar(sm, ticks=[0,100], shrink=0.5, aspect=10,pad=-0.05)
        cbar.set_ticklabels(['worst','best'])
    plt.draw()
    return axes
Exemple #14
0
def get_tree(tree_file, name_tree):
    tree = Phylo.read( open(tree_file, 'r'), "newick")
    tree_name = Phylo.read( open(name_tree, 'r'), "newick")
    #set node number for nonterminal nodes and specify root node
    numInternalNode = 0
    for clade in tree.get_nonterminals():
        clade.name = 'N' + str(numInternalNode)
        clade.branch_length = clade.confidence
        numInternalNode += 1

    
    for clade_iter in range(len(tree.get_terminals())):
        clade = tree.get_terminals()[clade_iter]
        clade.branch_length = clade.confidence
        clade.name = tree_name.get_terminals()[clade_iter].name
    tree_phy = tree.as_phyloxml(rooted = 'True')
    tree_nx = Phylo.to_networkx(tree_phy)


    triples = ((u.name, v.name, d['weight']) for (u, v, d) in tree_nx.edges(data = True)) # data = True to have the blen as 'weight'
    T = nx.DiGraph()
    edge_to_blen = {}
    for va, vb, blen in triples:
        edge = (va, vb)
        T.add_edge(*edge)
        edge_to_blen[edge] = blen

    edge_list = edge_to_blen.keys()
    edge_list.sort(key = lambda node: int(node[0][1:]))

    return edge_to_blen, edge_list
def measure_D_net(G,qmod,qcon):
    D_net_dic = {}
    D_net_ret = {}
    D_net = []
    for u in G: D_net_dic[u] = {}

    for u in sorted(G):
        key1 = "Taxon" + str(u)
        tmp_row = []
        for v in sorted(G):
            key2 = "Taxon" + str(v)
            if u < v: continue
            D_net_dic[u][v] = 1.0 - G.dmc_likelihood(u,v,qmod,qcon)
            tmp_row.append(D_net_dic[u][v])

            print D_net_dic[u][v],
        D_net.append(tmp_row)
        print '\n'


    names = []
    for u in G: names.append('Taxon'+str(u))
    print names 
    print D_net
    D_net_final = _DistanceMatrix(names,D_net)
    #print D_net_final.names 

    constructor = DistanceTreeConstructor()
    tree_dmc = constructor.upgma(D_net_final)
    #print tree_dmc
    Phylo.write(tree_dmc,'ph_dmc.nre','newick')
    
    return D_net_final
Exemple #16
0
    def distance_matrix(cls, cluster_list):
        print cluster_list
        dists = Distance.objects.filter(rep_accnum1__in=cluster_list, rep_accnum2__in=cluster_list)
        
        distance_pairs = {g.rep_accnum1 + '_' + g.rep_accnum2: g.distance for g in dists.all()}
    
        matrix = []
        for i in range(0,len(cluster_list)):
            matrix_iteration = []
            for j in range(0,i+1):
                if i == j:
                    matrix_iteration.append(0)
                elif cluster_list[i] + '_' + cluster_list[j] in distance_pairs:
                    matrix_iteration.append(distance_pairs[cluster_list[i] + '_' + cluster_list[j]])
                elif cluster_list[j] + '_' + cluster_list[i] in distance_pairs:
                    matrix_iteration.append(distance_pairs[cluster_list[j] + '_' + cluster_list[i]])
                else:
                    raise("Error, can't find pair!")
            matrix.append(matrix_iteration)
            #print matrix_iteration

        cluster_list = [s.encode('ascii', 'ignore') for s in cluster_list]
        matrix_obj = _DistanceMatrix(names=cluster_list, matrix=matrix)
        constructor = DistanceTreeConstructor()
        tree = constructor.nj(matrix_obj)
        tree.ladderize()
        #Phylo.draw_ascii(tree)
        output = StringIO.StringIO()
        Phylo.write(tree, output, 'newick')
        tree_str = output.getvalue()
        #print tree_str
        
        return tree_str
def rootTree(f, root,output):
	tree = Phylo.read(f,'newick')
	if ',' in root:
		taxa = root.split(',')
		root = tree.common_ancestor(taxa)
	tree.root_with_outgroup(root)
	Phylo.write(tree,output,'newick')
Exemple #18
0
    def make_trees(self, force=False):
        for i, (root, _, files) in enumerate(os.walk(self.seed_directory)):
            if i==0: #skip base path
                continue
            hist_type = os.path.basename(root)
            print "Creating tree for", hist_type

            final_tree_name = os.path.join(self.trees_path, "{}_no_features.xml".format(hist_type))
            if not force and os.path.isfile(final_tree_name):
                continue

            if not os.path.exists(self.trees_path):
                os.makedirs(self.trees_path)

            #Combine all variants for a core histone type into one unaligned fasta file
            combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(hist_type))
            combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(hist_type))
            with open(combined_seed_file, "w") as combined_seed:
                for seed in files: 
                    if not seed.endswith(".fasta"): continue
                    for s in SeqIO.parse(os.path.join(self.seed_directory, hist_type, seed), "fasta"):
                        s.seq = s.seq.ungap("-")
                        SeqIO.write(s, combined_seed, "fasta")

            #Create trees and convert them to phyloxml
            tree = os.path.join(self.trees_path, "{}_aligned.ph".format(hist_type))
            subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned])
            print " ".join(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree'])
            subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree'])
            Phylo.convert(tree, 'newick', final_tree_name, 'phyloxml')
def get_tree(newicktree):
    tree = Phylo.read( newicktree, "newick")
    #set node number for nonterminal nodes and specify root node
    numInternalNode = 0
    for clade in tree.get_nonterminals():
        clade.name = 'N' + str(numInternalNode)
        numInternalNode += 1
    tree_phy = tree.as_phyloxml(rooted = 'True')
    tree_nx = Phylo.to_networkx(tree_phy)

    triples = ((u.name, v.name, d['weight']) for (u, v, d) in tree_nx.edges(data = True)) # data = True to have the blen as 'weight'
    T = nx.DiGraph()
    edge_to_blen = {}
    for va, vb, blen in triples:
        edge = (va, vb)
        T.add_edge(*edge)
        edge_to_blen[edge] = blen

    # Now assign node_to_num
    leaves = set(v for v, degree in T.degree().items() if degree == 1)
    internal_nodes = set(list(T)).difference(leaves)
    node_names = list(internal_nodes) + list(leaves)

    # Prepare for generating self.tree so that it has same order as the self.x_process
    nEdge = len(edge_to_blen)  # number of edges
    l = nEdge / 2 + 1               # number of leaves
    k = l - 1   # number of internal nodes. The notation here is inconsistent with Alex's for trying to match my notes.

    leaf_branch = [edge for edge in edge_to_blen.keys() if edge[0][0] == 'N' and str.isdigit(edge[0][1:]) and not str.isdigit(edge[1][1:])]
    out_group_branch = [edge for edge in leaf_branch if edge[0] == 'N0' and not str.isdigit(edge[1][1:])] [0]
    internal_branch = [x for x in edge_to_blen.keys() if not x in leaf_branch]
    assert(len(internal_branch) == k-1)  # check if number of internal branch is one less than number of internal nodes
    
    return list(leaves), out_group_branch
Exemple #20
0
def write_clusters(seqfname, tree, clusters, unclustered):
    """Write output files: clusters & unique as FASTA, tree as phyloXML."""
    is_aln = seqfname.endswith('.aln')
    seq_idx = SeqIO.to_dict(SeqIO.parse(seqfname,
                                        'clustal' if is_aln else 'fasta'))
    def write_cluster(cluster, fname):
        """Write the sequences of cluster tips to a FASTA file."""
        records = [seq_idx[seqid] for seqid in sorted(cluster)]
        with open(fname, 'w+') as handle:
            for rec in records:
                write_fasta(rec, handle, do_ungap=is_aln)
        logging.info("Wrote %s (%d sequences)", fname, len(records))

    colors = [BranchColor(*map(lambda x: int(x*255), rgb))
            for rgb in ColorSpiral().get_colors(len(clusters))]
    for i, item in enumerate(sorted(clusters.iteritems(), reverse=True,
                                    key=lambda kv: len(kv[1]))):
        clade, cluster = item
        write_cluster(cluster, os.path.basename(seqfname) + '.' + str(i))
        clade.color = colors[i]
        clade.width = 2
    if unclustered:
        write_cluster(unclustered, os.path.basename(seqfname) + '.Unique')

    treefname = os.path.basename(seqfname) + '.xml'
    Phylo.write(tree, treefname, 'phyloxml') 
    logging.info("Wrote %s", treefname)
Exemple #21
0
def plot_tree(tree):
	from Bio import Phylo
	from tree_util import to_Biopython, color_BioTree_by_attribute
	btree = to_Biopython(tree)
	color_BioTree_by_attribute(btree,"cHI", transform = lambda x:x)
	Phylo.draw(btree, label_func = lambda  x: 'X' if x.serum else '' if x.HI_info else '',
		show_confidence= False) #, branch_labels = lambda x:x.mutations)
Exemple #22
0
    def serialize_trees(self, tree_uri='', format='newick', trees=None, handle=None):
        '''Retrieve trees serialized to any format supported by Biopython.
        
        Current options include 'newick', 'nexus', 'phyloxml', 'nexml', and 'cdao'

        Example:
        >>> treestore.serialize_trees('http://www.example.org/test/')
        '''
        
        if handle: s = handle
        else: s = StringIO()
        
        if tree_uri: tree_uri = self.uri_from_id(tree_uri)
        
        if trees is None: 
            trees = [(x for x in self.get_trees(tree_uri)).next()]
        if not trees:
            raise Exception('Tree to be serialized not found.')

        if format == 'cdao':
            bp.write(trees, s, format, tree_uri=tree_uri)
        elif format == 'ascii':
            bp._utils.draw_ascii((i for i in trees).next(), file=s)
        else:
            bp.write(trees, s, format)

        if handle: return
        return s.getvalue()
Exemple #23
0
def generate_new_files(fname) :
    # to get gene names that slr can handle (short enough)
    newfname = fname.replace(".", "_.")

    # generate a fasta file with new ids
    d = {}
    sequences = []

    runningids = 1
    for record in SeqIO.parse(fname, 'fasta') :
        d[record.id] = "flyg%s" % runningids
        record.id = d[record.id]
        record.name = ""
        record.description = ""
        sequences.append(record)
        runningids += 1

    SeqIO.write(sequences, newfname, "fasta")
    

    if not RUN_RAXML :
    # generate a treefile with new ids
        treefile = fname.replace("fasta", "tree")
        newtreefile = newfname.replace("fasta", "tree")

        tree = Phylo.read(treefile, 'newick')

        for node in tree.get_terminals():
            node.name = d[node.name]

        Phylo.write(tree, newtreefile, 'newick')

    return newfname
Exemple #24
0
def GetExec():
    Recs = os.listdir(os.getcwd())
    
    newList=[]
    j = 0

    listdata=dict()
    k = 0
    while k < len(Recs):
        (name, ext) = os.path.splitext(Recs[k])
        if len(ext)>3 and ext[0:4]=='.dnd':
            tree = Phylo.read(Recs[k], "newick")
            tree.rooted = True
            newList.append([tree,'ok'])
            listdata[j] = j,str(Recs[k])
            j+=1
        elif len(ext)>3 and ext[0:4]=='.xml':
            tree = Phylo.read(Recs[k], "phyloxml")
            tree.rooted = True
            newList.append([tree,'ok'])
            listdata[j] = j,str(Recs[k])
            j+=1
    
        k += 1
    return [newList,listdata]
Exemple #25
0
  def reroot_tree_with_outgroup(tree_name, outgroups):
    clade_outgroups = GubbinsCommon.get_monophyletic_outgroup(tree_name, outgroups)
    outgroups = [{'name': taxon_name} for taxon_name in clade_outgroups]

    tree = Phylo.read(tree_name, 'newick')
    tree.root_with_outgroup(*outgroups)
    Phylo.write(tree, tree_name, 'newick')

    tree  = dendropy.Tree.get_from_path(tree_name, 'newick',
              preserve_underscores=True)
    tree.deroot()
    tree.update_splits()
    output_tree_string = tree.as_string(
      'newick',
      taxon_set=None,
      suppress_leaf_taxon_labels=False,
      suppress_leaf_node_labels=True,
      suppress_internal_taxon_labels=False,
      suppress_internal_node_labels=False,
      suppress_rooting=True,
      suppress_edge_lengths=False,
      unquoted_underscores=True,
      preserve_spaces=False,
      store_tree_weights=False,
      suppress_annotations=True,
      annotations_as_nhx=False,
      suppress_item_comments=True,
      node_label_element_separator=' ',
      node_label_compose_func=None
      )
    output_file = open(tree_name, 'w+')
    output_file.write(output_tree_string.replace('\'', ''))
    output_file.closed
Exemple #26
0
 def showMatplotlibTreeWindow(self):
     if self.chosenFileName == '':
         self.showOpenFileDialog()
     
     if self.tree != 0:
         self.tree.root.color = '#808080'
         Phylo.draw(self.tree, branch_labels = lambda c: c.branch_length)
Exemple #27
0
    def drawConsensusTreeBioNexus(self):
        if self.path1 != '' and self.path2 != '':
            #get files extensions
            self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:]
            
            #open tree files            
            self.trees = []
            
            #first tree
            self.f = open(self.path1, 'r')
            self.tree1 = Trees.Tree(self.f.read())
            self.trees.append(self.tree1)
            self.f.close()
            
            #second tree
            self.f = open(self.path2, 'r')
            self.tree2 = Trees.Tree(self.f.read())
            self.trees.append(self.tree2)
            self.f.close()


            #generate consensus tree
            self.consensus_tree = Trees.consensus(self.trees)
            
            #draw tree
            self.handle = StringIO(self.consensus_tree.to_string(plain_newick=True))
            self.tree = Phylo.read(self.handle, 'newick')
            self.tree.root.color = '#808080'
            Phylo.draw(self.tree)
Exemple #28
0
def main():
    args = parse_args()
    tree = Phylo.read(args.input_file, args.input_type)
    tree = tree.as_phyloxml()
    if args.zchemat_kolorowania == 'eba':
        get_colors_and_groups = get_eukariota_group
    elif args.zchemat_kolorowania == 'fungi':
        get_colors_and_groups = get_fungus_groups
    elif args.zchemat_kolorowania == 'opisto':
        get_colors_and_groups = get_opisto_groups

    for branch in tree.get_nonterminals():
        try:
            branch.confidence.type = "bootstrap"
            branch.name = branch.confidence.value
        except AttributeError:
            pass

    colors, list_of_groups = get_colors_and_groups()

    for leaf in tree.get_terminals():
        name = leaf.name.strip()
        try:
            index = name.index(".")
            name = name[index + 3:]
        except:
            name = "_".join(name.split("_")[-2:])
        for color, members in zip(colors, list_of_groups):
            if name in members:
                leaf.color = color
    Phylo.write(tree, args.output_file, "phyloxml")
Exemple #29
0
    def drawConsensusTreeDendropy(self):
        if self.path1 != '' and self.path2 != '':
            #get files extensions
            self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:]
            
            #open tree files
            self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1)
            self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2)
            
            #prepare tree list
            self.trees = dendropy.TreeList()
            self.trees.append(self.tree1)
            self.trees.append(self.tree2)
            
            #generate consensus tree
            self.consensus_tree = self.trees.consensus(min_freq=0.2)
            
            #draw tree
            self.handle = StringIO(self.consensus_tree._as_newick_string())
            # POPRAWIONY BLAD Z KONWERSJA DO BUFORA

            #self.handle = StringIO(self.consensus_tree.to_string(plain_newick=True))
            self.tree = Phylo.read(self.handle, 'newick')
            self.tree.root.color = '#808080'
            Phylo.draw(self.tree)
Exemple #30
0
    def write ( self, phytrees_file ) :
        """
        Save all trees stored at the PhyTrees object in the 'phytrees_file' (in
        newick format). A file with a detailed report of the trees will be
        created replacing the extension of 'phytrees_file' by ".rep". If
        'phytrees_file' contains a relative path, the current working directory
        will be used to get the absolute path. If any file already exists, it
        will be overwritten without warning.
        
        Arguments :
            phytrees_file  ( string )
                New PhyTrees tree file.

        Raises :
            IOError
                If the path provided doesn't exist.
        """
        data_filepath = get_abspath(phytrees_file)
        report_filepath = os.path.splitext(data_filepath)[0] + '.rep'
        # Generate a single string with all the report content
        str_report = '\n'.join(['    '.join(x)  for x in self._report])
        # Write all the information in the PhyTrees files
        try :
            Phylo.write(self.data, data_filepath, 'newick')
            with open(report_filepath, 'w') as report_file :
                report_file.write('Num. trees: {:d}\nHistory:\n' \
                                  '{:s}'.format(len(self), str_report))
        except IOError :
            raise
        except :
            if ( os.path.isfile(data_filepath) ) :
                os.remove(data_filepath)
            if ( os.path.isfile(report_filepath) ) :
                os.remove(report_filepath)
            raise
Exemple #31
0
    confidences = []
    for c in clade.clades:
        #if not c.confidence is None:
        confidences.append(c.confidence)
        confidences += parse(c)
    return confidences


texDir = '/Users/ahenschel/Github/FreqRT/ManuscriptBootstrapping/Tables'
nwkFiles = [n for n in sys.argv[1:] if n.endswith('.nwk')]
if not nwkFiles:
    print("Provide newick files through command line, e.g.: Data/*.nwk")
else:
    data = []
    for nwkFile in nwkFiles:
        tree = Phylo.read(nwkFile, format='newick')
        rec = infoFromName(nwkFile)
        confidences = np.array(parse(tree.root), dtype=np.float)
        nrNans = np.sum(np.isnan(confidences))
        nrNansPct = nrNans / len(confidences)
        rec += (np.nanmean(confidences), np.nanmedian(confidences), nrNans,
                nrNansPct)
        data.append(rec)
        if True:
            plt.clf()
            Phylo.draw(tree)
            plt.savefig(f'{nwkFile[:-4]}.svg', figsize=(18, 12), dpi=300)
            plt.close()
            #input("Press enter")
        #break
    columns = 'Loci NrLoci Thresh Build Pops AvgConf MedConf Miss MissPct'.split(
Exemple #32
0
 def __init__(self, newick_tree):
     self.tree = Phylo.read(
         newick_tree, "newick", values_are_confidence=True, rooted=True
     )
Exemple #33
0
    def test_get_score(self):
        aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
        tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
        scorer = ParsimonyScorer()
        score = scorer.get_score(tree, aln)
        self.assertEqual(score, 2 + 1 + 2 + 2 + 1 + 1 + 1 + 3)

        alphabet = ["A", "T", "C", "G"]
        step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]]
        matrix = _Matrix(alphabet, step_matrix)
        scorer = ParsimonyScorer(matrix)
        score = scorer.get_score(tree, aln)
        self.assertEqual(score, 3.5 + 2.5 + 3.5 + 3.5 + 2.5 + 1 + 2.5 + 4.5)

        alphabet = [
            "A",
            "C",
            "D",
            "E",
            "F",
            "G",
            "H",
            "I",
            "K",
            "L",
            "M",
            "N",
            "P",
            "Q",
            "R",
            "1",
            "2",
            "T",
            "V",
            "W",
            "Y",
            "*",
            "-",
        ]
        step_matrix = [
            [0],
            [2, 0],
            [1, 2, 0],
            [1, 2, 1, 0],
            [2, 1, 2, 2, 0],
            [1, 1, 1, 1, 2, 0],
            [2, 2, 1, 2, 2, 2, 0],
            [2, 2, 2, 2, 1, 2, 2, 0],
            [2, 2, 2, 1, 2, 2, 2, 1, 0],
            [2, 2, 2, 2, 1, 2, 1, 1, 2, 0],
            [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0],
            [2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 0],
            [1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0],
            [2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 0],
            [2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0],
            [1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0],
            [2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 0],
            [1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0],
            [1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0],
            [2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 0],
            [2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 3, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0],
            [2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 0],
            [
                3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
                3, 0
            ],
        ]

        matrix = _Matrix(alphabet, step_matrix)
        scorer = ParsimonyScorer(matrix)
        score = scorer.get_score(tree, aln)
        self.assertEqual(score, 3 + 1 + 3 + 3 + 2 + 1 + 2 + 5)
 def readtrees(self):
     return Phylo.parse(self.infile, 'newick')
Exemple #35
0
 def test_newick_read_scinot(self):
     """Parse Newick branch lengths in scientific notation."""
     tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), 'newick')
     clade_a = tree.clade[0]
     self.assertEqual(clade_a.name, 'foo')
     self.assertAlmostEqual(clade_a.branch_length, 0.1)
Exemple #36
0
 def test_newick_read_multiple(self):
     """Parse a Nexus file with multiple trees."""
     trees = list(Phylo.parse(EX_NEXUS, 'nexus'))
     self.assertEqual(len(trees), 3)
     for tree in trees:
         self.assertEqual(len(tree.get_terminals()), 9)
Exemple #37
0
 def setUp(self):
     self.phylogenies = list(Phylo.parse(EX_PHYLO, 'phyloxml'))
 def test_newick(self):
     """Read a Newick file with one tree."""
     tree = Phylo.read(EX_NEWICK, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
    print(err.decode("utf-8"))
    print(p2_status)

    f = open(sequence_ids_outfile, 'r')
    sequence_ids_output_file = csv.reader(f)
    sequence_header_list = {}
    for entry in sequence_ids_output_file:
        sequence_id = entry[0]
        description = entry[1]
        seq_type = entry[2]
        sequence_header_list[sequence_id] = [description, seq_type]
    f.close()

    if (p2_status == 0):

        best_tree = Phylo.read(mega_best_tree_infile, 'newick')
        print(best_tree)

        best_tree.root_with_outgroup(
            {'name': 'b30065_LXYB01000004_Acholeplasma_laidlawii_PG8R10'})
        best_tree.rooted = True

        #Phylo.draw(best_tree)
        #pylab.savefig(mega_tree_filename + '.png')

        Phylo.write(best_tree, mega_tree_filename + '.nwk', 'newick')
        Phylo.write(best_tree, mega_tree_filename + '.xml', 'phyloxml')

        f_out = open(subgroups_tree_outfile, 'w')
        for fasta_record in SeqIO.parse(subgroups_tree_file, "fasta"):
            seq_id = fasta_record.id
Exemple #40
0
if __name__ == "__main__":
    options = get_options()
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import seaborn as sns

    sns.set_style('white')

    import os
    import pandas as pd
    import numpy as np
    from Bio import Phylo

    t = Phylo.read(options.tree, 'newick')

    print ' Max distance to create better plots'
    mdist = max([t.distance(t.root, x) for x in t.get_terminals()])

    print ' Load roary'
    roary = pd.read_table(options.spreadsheet, sep=',', low_memory=False)
    print ' Set index (group name)'
    roary.set_index('Gene', inplace=True)
    print '   Find tagged genes'
    if options.tag is not None:
        if "Inference" in roary.columns:
            roary_info = roary[["Inference"]].copy()
            roary.drop(["Inference"], axis=1, inplace=True)
            VF = roary_info[roary_info.applymap(
                lambda x: options.tag.upper() in x.upper()).values].index
Exemple #41
0
from Bio import Phylo

selgenes = ["rrs", "atpB"]
sgstat = {
    "rrs": [1499, 18],
    "psaB": [1790, 21],
    "tufA": [1225, 13],
    "atpB": [3510, 66],
    "rpoB": [3765, 56]
}
#rnconv = { "p9" : "tufA", "p34" : "rpl19", "p4" : "psaB", "p6" : "atpB", "p2" : "rpoB" }
fig, ax = plt.subplots(1, len(selgenes), figsize=(40, 20), dpi=80)

for k in range(len(selgenes)):
    gene = selgenes[k]
    tree = Phylo.read("fragments_" + gene + ".nwk", 'newick')
    matplotlib.rcParams["font.size"] = 28
    matplotlib.rcParams["lines.linewidth"] = 3
    leafs = tree.get_terminals(order="postorder")

    bspnum = 0
    spid = "bsponge"
    labelconv = {
        "p_salinaru": "Picocystis salinarum",
        "myrmecia": "Myrmecia israeliensis",
        "botyococcu": "Botryococcus braunii",
        "coccomyxa": "Coccomyxa subellipsoidea",
        "c_parasiti": "Choricystis parasitica",
        "bsponge": "Sponge symbiont",
        "h_reticala": "Hydrodictyon reticulatum",
        "m_jurisii": "Mychonastes jurisii",
Exemple #42
0
        return colors['darkblue']
    elif c.split(" ")[0] == "GR":
        return colors['blue']
    elif c.split(" ")[0] == "G":
        return colors['seagreen']
    elif c.split(" ")[0] == "GH":
        return colors['green']
    elif c.split(" ")[0] == "S":
        return colors['lime']
    elif c.split(" ")[0] == "O":
        return colors['gold']
    elif c.split(" ")[0] == "GV":
        return colors['darkorange']
    elif c.split(" ")[0] == "V":
        return colors['tomato']
    elif c.split(" ")[0] == "L":
        return colors['red']


matplotlib.rc('font', size=30)
fig = plt.figure(figsize=(35, 25), dpi=100)
axes = fig.add_subplot(1, 1, 1)
Phylo.draw(tree,
           axes=axes,
           do_show=False,
           label_func=labels,
           label_colors=labels_col)
plt.axis('off')
plt.savefig("plots/tree3.svg", format="svg", transparent=True)
Phylo.draw_ascii(tree)
Exemple #43
0
#Alinhamento multiplo e arvore filogenética
help(ClustalwCommandline)
cline = ClustalwCommandline(
    "clustalw2",
    infile=
    "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Probitina_MA.fasta"
)
print(cline)

cline = MuscleCommandline(
    input=
    "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitina_MA.fasta",
    out="Proibitina_MA.aln",
    clw=True)
print(cline)

#Leitura de ficheiro do alinhamento multiplo
alignment = AlignIO.read(
    "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitinalinhados.fasta",
    "fasta")
print(alignment)

#Leitura do ficheiro da arvore filogenética
arvore = Phylo.read(
    "C:/Users/Zé Freitas/Desktop/Mestrado/Labs_Bioinf/Trabalho prático/scripts/Labs_Bioinf/Proibitina/Proibitinarvore.dnd",
    "newick")
print(arvore)

Phylo.draw_ascii(arvore)
Exemple #44
0
        ax.tick_params(labelsize=fs * 0.8)
        ax.set_ylim([0, 1.1 * np.max(yi)])
        plt.tight_layout()
        plt.legend(fontsize=fs * 0.8)


if __name__ == '__main__':
    import matplotlib.pyplot as plt
    import time
    plt.ion()
    # tree_file = '../data/H3N2_NA_allyears_NA.20.nwk'
    # date_file = '../data/H3N2_NA_allyears_NA.20.metadata.csv'
    tree_file = '../data/ebola.nwk'
    date_file = '../data/ebola.metadata.csv'

    T = Phylo.read(tree_file, 'newick')
    #T.root_with_outgroup('A/Canterbury/58/2000|CY009150|09/05/2000|New_Zealand||H3N2/8-1416')

    dates = {}
    with open(date_file) as ifile:
        ifile.readline()
        for line in ifile:
            if line[0] != '#':
                fields = line.strip().split(',')
                dates[fields[0]] = float(fields[1])

    for l in T.get_terminals():
        l.numdate = dates[l.name]
    branch_variance = lambda x: (x.branch_length +
                                 (0.0005
                                  if x.is_terminal() else 0.0)) / 19000.0
Exemple #45
0
 def test_newick_read_single3(self):
     """Read Nexus file with one tree."""
     tree = Phylo.read(EX_NEXUS2, "nexus")
     self.assertEqual(len(tree.get_terminals()), 658)
Exemple #46
0
def GRAViTyDendrogramAndHeatmapConstruction(
    ShelveDir,
    IncludeIncompleteGenomes=False,
    SimilarityMeasurementScheme="PG",
    p=1,
    Dendrogram=True,
    Dendrogram_LinkageMethod="average",  #"single", "complete", "average", "weighted"
    Bootstrap=True,
    N_Bootstrap=10,
    Bootstrap_method="booster",  #"booster", "sumtrees"
    Bootstrap_N_CPUs=20,
    Heatmap=False,
    Heatmap_VirusOrderScheme=None,  #"Filename", 
    Heatmap_WithDendrogram=True,
    Heatmap_DendrogramFile=None,
    Heatmap_DendrogramSupport_Cutoff=0.75,
    VirusGrouping=True,
):
    print "################################################################################"
    print "#Generate GRAViTy dendrogram and heat map                                      #"
    print "################################################################################"
    '''
	Generate GRAViTy dendrogram and heat map
	---------------------------------------------
	'''
    ################################################################################
    print "- Define dir/file paths"
    ################################################################################
    print "\tto program output shelve"
    #-------------------------------------------------------------------------------
    VariableShelveDir = ShelveDir + "/Shelves"

    if Dendrogram == True:
        print "\t\tto virus dendrogram file"
        #-------------------------------------------------------------------------------
        VirusDendrogramFile = VariableShelveDir + "/Dendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % (
            str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme,
            Dendrogram_LinkageMethod, p)

        if Bootstrap == True:
            print "\t\tto dendrogram distribution file"
            #-------------------------------------------------------------------------------
            VirusDendrogramDistFile = VariableShelveDir + "/DendrogramDist.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % (
                str(int(IncludeIncompleteGenomes)),
                SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p)
            if os.path.isfile(VirusDendrogramDistFile):
                os.remove(VirusDendrogramDistFile)

            print "\t\tto bootstrapped dendrogram file"
            #-------------------------------------------------------------------------------
            BootstrappedVirusDendrogramFile = VariableShelveDir + "/BootstrappedDendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.nwk" % (
                str(int(IncludeIncompleteGenomes)),
                SimilarityMeasurementScheme, Dendrogram_LinkageMethod, p)
            if os.path.isfile(BootstrappedVirusDendrogramFile):
                os.remove(BootstrappedVirusDendrogramFile)

    if Heatmap == True:
        print "\t\tto heat map file"
        #-------------------------------------------------------------------------------
        HeatmapFile = VariableShelveDir + "/Heatmap.IncompleteGenomes=%s.Scheme=%s.p=%s.pdf" % (
            str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, p)

    if Heatmap_WithDendrogram == True:
        print "\t\tto heat map with dendrogram file"
        #-------------------------------------------------------------------------------
        HeatmapWithDendrogramFile = VariableShelveDir + "/HeatmapWithDendrogram.IncompleteGenomes=%s.Scheme=%s.Method=%s.p=%s.support_cutoff=%s.pdf" % (
            str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme,
            Dendrogram_LinkageMethod, p, Heatmap_DendrogramSupport_Cutoff)
        if Heatmap_DendrogramFile == None:
            if Dendrogram == True:
                if Bootstrap == True:
                    Heatmap_DendrogramFile = BootstrappedVirusDendrogramFile
                elif Bootstrap == False:
                    Heatmap_DendrogramFile = VirusDendrogramFile
            else:
                raise SystemExit("The dendrogram file is missing.")
        elif not os.path.isfile(Heatmap_DendrogramFile):
            raise SystemExit("Can't find %s." % Heatmap_DendrogramFile)

    if VirusGrouping == True:
        VirusGroupingFile = VariableShelveDir + "/VirusGrouping.IncompleteGenomes=%s.Scheme=%s.p=%s.txt" % (
            str(int(IncludeIncompleteGenomes)), SimilarityMeasurementScheme, p)

    ################################################################################
    print "- Retrieve variables"
    ################################################################################
    if IncludeIncompleteGenomes == True:
        print "\tfrom ReadGenomeDescTable.AllGenomes.shelve"
        #-------------------------------------------------------------------------------
        VariableShelveFile = VariableShelveDir + "/ReadGenomeDescTable.AllGenomes.shelve"
    elif IncludeIncompleteGenomes == False:
        print "\tfrom ReadGenomeDescTable.CompleteGenomes.shelve"
        #-------------------------------------------------------------------------------
        VariableShelveFile = VariableShelveDir + "/ReadGenomeDescTable.CompleteGenomes.shelve"

    #VariableShelveFile = VariableShelveDir+"/ReadGenomeDescTable.shelve"
    Parameters = shelve.open(VariableShelveFile)
    for key in [
            "SeqIDLists",
            "FamilyList",
            "GenusList",
            "VirusNameList",
            "TaxoGroupingList",
    ]:
        globals()[key] = Parameters[key]
        print "\t\t" + key

    Parameters.close()

    if IncludeIncompleteGenomes == True:
        print "\tfrom RefVirusAnnotator.AllGenomes.shelve"
        #-------------------------------------------------------------------------------
        VariableShelveFile = VariableShelveDir + "/RefVirusAnnotator.AllGenomes.shelve"
    elif IncludeIncompleteGenomes == False:
        print "\tfrom RefVirusAnnotator.CompleteGenomes.shelve"
        #-------------------------------------------------------------------------------
        VariableShelveFile = VariableShelveDir + "/RefVirusAnnotator.CompleteGenomes.shelve"

    #VariableShelveFile = VariableShelveDir+"/RefVirusAnnotator.shelve"
    Parameters = shelve.open(VariableShelveFile)
    for key in [
            "PPHMMSignatureTable",
            "PPHMMLocationTable",
            "PPHMMSignatureTable_coo",
            "PPHMMLocationTable_coo",
            "GOMIDList",
            "GOMSignatureTable",
    ]:
        try:
            globals()[key] = Parameters[key]
            print "\t\t" + key
        except KeyError:
            pass

    Parameters.close()

    if "PPHMMSignatureTable_coo" in globals().keys():
        globals()["PPHMMSignatureTable"] = PPHMMSignatureTable_coo.toarray()
    if "PPHMMLocationTable_coo" in globals().keys():
        globals()["PPHMMLocationTable"] = PPHMMLocationTable_coo.toarray()

    ################################################################################
    print "- Estimate virus pairwise distances"
    ################################################################################
    SimMat = SimilarityMat_Constructor(
        PPHMMSignatureTable=PPHMMSignatureTable,
        GOMSignatureTable=GOMSignatureTable,
        PPHMMLocationTable=PPHMMLocationTable,
        SimilarityMeasurementScheme=SimilarityMeasurementScheme,
        p=p,
    )
    DistMat = 1 - SimMat
    DistMat[DistMat < 0] = 0

    if Dendrogram == True:
        ################################################################################
        print "- Estimate the GRAViTy dendrogram"
        ################################################################################
        #Make TaxoLabelList
        #---------------------------------------------------------------------
        TaxoLabelList = TaxoLabel_Constructor(SeqIDLists=SeqIDLists,
                                              FamilyList=FamilyList,
                                              GenusList=GenusList,
                                              VirusNameList=VirusNameList)

        #Make dendrogram
        #---------------------------------------------------------------------
        VirusDendrogram = DistMat2Tree(
            DistMat=DistMat,
            LeafList=TaxoLabelList,
            Dendrogram_LinkageMethod=Dendrogram_LinkageMethod)
        with open(VirusDendrogramFile, "w") as VirusDendrogram_txt:
            VirusDendrogram_txt.write(VirusDendrogram)

        if Bootstrap == True:
            ################################################################################
            print "- Compute bootstrap support"
            ################################################################################
            N_PPHMMs = PPHMMSignatureTable.shape[1]
            for Bootstrap_i in range(0, N_Bootstrap):
                print "\tRound %d" % (Bootstrap_i + 1)
                #-------------------------------------------------------------------------------
                print "\t\tConstruct bootstrapped PPHMMSignatureTable and PPHMMLocationTable"
                #-------------------------------------------------------------------------------
                PPHMM_IndexList = np.random.choice(range(N_PPHMMs),
                                                   N_PPHMMs,
                                                   replace=True)
                BootstrappedPPHMMSignatureTable = PPHMMSignatureTable[:,
                                                                      PPHMM_IndexList]
                BootstrappedPPHMMLocationTable = PPHMMLocationTable[:,
                                                                    PPHMM_IndexList]
                BootstrappedGOMSignatureTable = None
                if "G" in SimilarityMeasurementScheme:
                    print "\t\tConstruct bootstrapped GOMSignatureTable"
                    #-------------------------------------------------------------------------------
                    BootstrappedGOMDB = GOMDB_Constructor(
                        TaxoGroupingList=TaxoGroupingList,
                        PPHMMLocationTable=BootstrappedPPHMMLocationTable,
                        GOMIDList=GOMIDList)
                    BootstrappedGOMSignatureTable = GOMSignatureTable_Constructor(
                        PPHMMLocationTable=BootstrappedPPHMMLocationTable,
                        GOMDB=BootstrappedGOMDB,
                        GOMIDList=GOMIDList)

                print "\t\tConstruct a dendrogram from the bootstrapped data"
                #-------------------------------------------------------------------------------
                BootstrappedSimMat = SimilarityMat_Constructor(
                    PPHMMSignatureTable=BootstrappedPPHMMSignatureTable,
                    GOMSignatureTable=BootstrappedGOMSignatureTable,
                    PPHMMLocationTable=BootstrappedPPHMMLocationTable,
                    SimilarityMeasurementScheme=SimilarityMeasurementScheme,
                    p=p,
                )
                BootstrappedDistMat = 1 - BootstrappedSimMat
                BootstrappedDistMat[BootstrappedDistMat < 0] = 0
                BootstrappedVirusDendrogram = DistMat2Tree(
                    DistMat=BootstrappedDistMat,
                    LeafList=TaxoLabelList,
                    Dendrogram_LinkageMethod=Dendrogram_LinkageMethod)
                with open(VirusDendrogramDistFile,
                          "a") as VirusDendrogramDist_txt:
                    VirusDendrogramDist_txt.write(BootstrappedVirusDendrogram +
                                                  "\n")

            print "\tCreat a bootstrapped dendrogram"
            #-------------------------------------------------------------------------------
            if Bootstrap_method == "booster":
                _ = subprocess.Popen(
                    "booster -i %s -b %s -o %s -@ %d " %
                    (VirusDendrogramFile, VirusDendrogramDistFile,
                     BootstrappedVirusDendrogramFile, Bootstrap_N_CPUs),
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    shell=True)
                out, err = _.communicate()
            elif Bootstrap_method == "sumtrees":
                _ = subprocess.Popen(
                    "sumtrees.py --decimals=2 --no-annotations --preserve-underscores --force-rooted --output-tree-format=newick --output-tree-filepath=%s --target=%s %s"
                    % (BootstrappedVirusDendrogramFile, VirusDendrogramFile,
                       VirusDendrogramDistFile),
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    shell=True)
                out, err = _.communicate()
            else:
                print "'Bootstrap_method' can either be 'booster' or 'sumtrees'."

    if Heatmap == True:
        ################################################################################
        print "- Construct GRAViTy heatmap"
        ################################################################################
        #Determine virus order
        #-------------------------------------------------------------------------------
        N_Viruses = len(DistMat)
        if Heatmap_VirusOrderScheme == None:
            VirusOrder = range(N_Viruses)
        elif os.path.isfile(Heatmap_VirusOrderScheme):
            with open(Heatmap_VirusOrderScheme,
                      "r") as Heatmap_VirusOrderScheme_txt:
                VirusOrder = [
                    int(Virus_i.split("\r\n")[0].split("\n")[0])
                    for Virus_i in Heatmap_VirusOrderScheme_txt
                ]
        else:
            VirusOrder = range(N_Viruses)

        #Re-order the distance matrix
        #-------------------------------------------------------------------------------
        OrderedDistMat = DistMat[VirusOrder][:, VirusOrder]

        #Labels, label positions, and ticks
        #-------------------------------------------------------------------------------
        #ClassLabelList	= np.array([TaxoGrouping.split("_")[1] if TaxoGrouping.startswith("_") else TaxoGrouping for TaxoGrouping in TaxoGroupingList[VirusOrder]])
        ClassLabelList = np.array([
            re.split("_|\*", TaxoGrouping)[1] if TaxoGrouping.startswith(
                ("_", "*")) else TaxoGrouping
            for TaxoGrouping in TaxoGroupingList[VirusOrder]
        ])
        LineList = np.where(ClassLabelList[0:-1] != ClassLabelList[1:])[0]
        ClassLabelList = np.hstack(
            (ClassLabelList[LineList], ClassLabelList[-1]))
        LineList = LineList + 0.5
        TickLocList = np.array(
            map(np.mean,
                (zip(np.hstack(([-0.5], LineList)),
                     np.hstack((LineList, [len(TaxoGroupingList) - 0.5]))))))

        #Plot configuration
        #-------------------------------------------------------------------------------
        Heatmap_width = float(12)
        Heatmap_height = Heatmap_width
        TaxoLable_space = 1.00

        CBar_Heatmap_gap = 0.05
        CBar_width = Heatmap_width
        CBar_height = 0.25
        CBarLable_space = 0.25

        Outer_margin = 0.5
        FontSize = 6

        Fig_width = Outer_margin + Heatmap_width + TaxoLable_space + Outer_margin
        Fig_height = Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap + Heatmap_height + TaxoLable_space + Outer_margin

        ax_Heatmap_L = (Outer_margin + TaxoLable_space) / Fig_width
        ax_Heatmap_B = (Outer_margin + CBarLable_space + CBar_height +
                        CBar_Heatmap_gap) / Fig_height
        ax_Heatmap_W = Heatmap_width / Fig_width
        ax_Heatmap_H = Heatmap_height / Fig_height

        ax_CBar_L = (Outer_margin + TaxoLable_space) / Fig_width
        ax_CBar_B = (Outer_margin + CBarLable_space) / Fig_height
        ax_CBar_W = CBar_width / Fig_width
        ax_CBar_H = CBar_height / Fig_height

        #Plot the heat map
        #-------------------------------------------------------------------------------
        fig = plt.figure(figsize=(Fig_width, Fig_height), dpi=300)

        ax_Heatmap = fig.add_axes(
            [ax_Heatmap_L, ax_Heatmap_B, ax_Heatmap_W, ax_Heatmap_H],
            frame_on=True,
            facecolor="white")
        Heatmap_Graphic = ax_Heatmap.imshow(OrderedDistMat,
                                            cmap='magma',
                                            aspect='auto',
                                            vmin=0,
                                            vmax=1,
                                            interpolation='none')
        for l in LineList:
            ax_Heatmap.axvline(l, color='k', lw=0.2)
            ax_Heatmap.axhline(l, color='k', lw=0.2)

        ax_Heatmap.set_xticks(TickLocList)
        ax_Heatmap.set_xticklabels(ClassLabelList, rotation=90, size=FontSize)
        ax_Heatmap.set_yticks(TickLocList)
        ax_Heatmap.set_yticklabels(ClassLabelList, rotation=0, size=FontSize)
        ax_Heatmap.tick_params(top='on',
                               bottom='off',
                               left='off',
                               right='on',
                               labeltop='on',
                               labelbottom='off',
                               labelleft='off',
                               labelright='on',
                               direction='out')

        ax_CBar = fig.add_axes([ax_CBar_L, ax_CBar_B, ax_CBar_W, ax_CBar_H],
                               frame_on=True,
                               facecolor="white")
        CBar_Graphic = fig.colorbar(Heatmap_Graphic,
                                    cax=ax_CBar,
                                    orientation="horizontal",
                                    ticks=[0, 0.25, 0.50, 0.75, 1])
        CBar_Graphic.ax.set_xticklabels(
            ['0.00', '0.25', '0.50', '0.75', '1.00'], size=FontSize)
        CBar_Graphic.ax.set_xlabel('Distance', rotation=0, size=FontSize + 2)
        CBar_Graphic.ax.tick_params(top='off',
                                    bottom='on',
                                    left='off',
                                    right='off',
                                    labeltop='off',
                                    labelbottom='on',
                                    labelleft='off',
                                    labelright='off',
                                    direction='out')

        #Save the plot to file
        #-------------------------------------------------------------------------------
        plt.savefig(HeatmapFile, format="pdf")

    if Heatmap_WithDendrogram == True:
        ################################################################################
        print "- Construct GRAViTy heat map with dendrogram"
        ################################################################################
        N_Viruses = len(DistMat)

        #Load the tree
        #-------------------------------------------------------------------------------
        VirusDendrogram = Phylo.read(Heatmap_DendrogramFile, "newick")

        #Determine virus order
        #-------------------------------------------------------------------------------
        TaxoLabelList = TaxoLabel_Constructor(SeqIDLists=SeqIDLists,
                                              FamilyList=FamilyList,
                                              GenusList=GenusList,
                                              VirusNameList=VirusNameList)

        _ = VirusDendrogram.ladderize(reverse=True)
        OrderedTaxoLabelList = [
            Clade.name for Clade in VirusDendrogram.get_terminals()
        ]
        VirusOrder = [
            TaxoLabelList.index(TaxoLabel)
            for TaxoLabel in OrderedTaxoLabelList
        ]

        #Re-order the distance matrix
        #-------------------------------------------------------------------------------
        OrderedDistMat = DistMat[VirusOrder][:, VirusOrder]

        #Remove clade support values that are < Heatmap_DendrogramSupport_Cutoff
        #-------------------------------------------------------------------------------
        N_InternalNodes = len(VirusDendrogram.get_nonterminals())
        for InternalNode_i in range(N_InternalNodes):
            if VirusDendrogram.get_nonterminals(
            )[InternalNode_i].confidence < Heatmap_DendrogramSupport_Cutoff or np.isnan(
                    VirusDendrogram.get_nonterminals()
                [InternalNode_i].confidence):
                VirusDendrogram.get_nonterminals(
                )[InternalNode_i].confidence = ""
            else:
                VirusDendrogram.get_nonterminals(
                )[InternalNode_i].confidence = round(
                    VirusDendrogram.get_nonterminals()
                    [InternalNode_i].confidence, 2)

        #Labels, label positions, and ticks
        #-------------------------------------------------------------------------------
        Taxo2ClassDict = {
            TaxoLabel: TaxoGrouping
            for TaxoLabel, TaxoGrouping in zip(TaxoLabelList, TaxoGroupingList)
        }
        ClassDendrogram = copy(VirusDendrogram)
        for Clade in ClassDendrogram.find_clades(terminal=True):
            Clade.name = Taxo2ClassDict[Clade.name]

        ClassLabelList = []
        LineList = [-1]
        TerminalNodeList = [
            TerminalNode for TerminalNode in ClassDendrogram.get_terminals()
        ]
        while len(TerminalNodeList) != 0:
            FarLeftNode = TerminalNodeList[0]
            for Clade in ([ClassDendrogram] +
                          ClassDendrogram.get_path(FarLeftNode)):
                DescendantNodeList = Clade.get_terminals()
                DescendantClassLabelList = list(
                    set(map(lambda c: c.name, DescendantNodeList)))
                if len(DescendantClassLabelList) == 1:
                    ClassLabelList.append(DescendantClassLabelList[0])
                    LineList.append(LineList[-1] + len(DescendantNodeList))
                    TerminalNodeList = TerminalNodeList[len(DescendantNodeList
                                                            ):]
                    break

        ClassLabelList = np.array(ClassLabelList)
        LineList = np.array(LineList) + 0.5
        TickLocList = np.array(map(np.mean, zip(LineList[0:-1], LineList[1:])))

        #Plot configuration
        #-------------------------------------------------------------------------------
        Heatmap_width = float(12)
        Heatmap_height = Heatmap_width
        TaxoLable_space = 1.00

        CBar_Heatmap_gap = 0.05
        CBar_width = Heatmap_width
        CBar_height = 0.25
        CBarLable_space = 0.25

        Dendrogram_width = Heatmap_width / 3
        Dendrogram_height = Heatmap_height
        Dendrogram_Heatmap_gap = 0.1

        ScaleBar_Dendrogram_gap = CBar_Heatmap_gap
        ScaleBar_width = Dendrogram_width
        ScaleBar_height = CBar_height
        ScaleBarLable_space = CBarLable_space

        Outer_margin = 0.5
        FontSize = 6

        Fig_width = Outer_margin + Dendrogram_width + Dendrogram_Heatmap_gap + Heatmap_width + TaxoLable_space + Outer_margin
        Fig_height = Outer_margin + CBarLable_space + CBar_height + CBar_Heatmap_gap + Heatmap_height + TaxoLable_space + Outer_margin

        ax_Dendrogram_L = Outer_margin / Fig_width
        ax_Dendrogram_B = (Outer_margin + ScaleBarLable_space + ScaleBar_height
                           + ScaleBar_Dendrogram_gap) / Fig_height
        ax_Dendrogram_W = Dendrogram_width / Fig_width
        ax_Dendrogram_H = Dendrogram_height / Fig_height

        ax_ScaleBar_L = Outer_margin / Fig_width
        ax_ScaleBar_B = (Outer_margin + ScaleBarLable_space) / Fig_height
        ax_ScaleBar_W = ScaleBar_width / Fig_width
        ax_ScaleBar_H = ScaleBar_height / Fig_height

        ax_Heatmap_L = (Outer_margin + Dendrogram_width +
                        Dendrogram_Heatmap_gap) / Fig_width
        ax_Heatmap_B = (Outer_margin + CBarLable_space + CBar_height +
                        CBar_Heatmap_gap) / Fig_height
        ax_Heatmap_W = Heatmap_width / Fig_width
        ax_Heatmap_H = Heatmap_height / Fig_height

        ax_CBar_L = (Outer_margin + Dendrogram_width +
                     Dendrogram_Heatmap_gap) / Fig_width
        ax_CBar_B = (Outer_margin + CBarLable_space) / Fig_height
        ax_CBar_W = CBar_width / Fig_width
        ax_CBar_H = CBar_height / Fig_height

        #Plot the heat map
        #-------------------------------------------------------------------------------
        fig = plt.figure(figsize=(Fig_width, Fig_height), dpi=300)

        ax_Dendrogram = fig.add_axes([
            ax_Dendrogram_L, ax_Dendrogram_B, ax_Dendrogram_W, ax_Dendrogram_H
        ],
                                     frame_on=False,
                                     facecolor="white")
        Phylo.draw(VirusDendrogram,
                   label_func=lambda x: "",
                   do_show=False,
                   axes=ax_Dendrogram)
        VirusDendrogramDepth = max(
            [v for k, v in VirusDendrogram.depths().iteritems()])
        ax_Dendrogram.set_xlim([(VirusDendrogramDepth - 1),
                                VirusDendrogramDepth])
        ax_Dendrogram.set_ylim([N_Viruses + 0.5, 0.5])
        ax_Dendrogram.set_axis_off()

        ax_ScaleBar = fig.add_axes(
            [ax_ScaleBar_L, ax_ScaleBar_B, ax_ScaleBar_W, ax_ScaleBar_H],
            frame_on=False,
            facecolor="white")
        ax_ScaleBar.plot([0, 1], [0, 0], 'k-')
        ScaleBarTicks = [0, 0.25, 0.5, 0.75, 1]
        for Tick in ScaleBarTicks:
            ax_ScaleBar.plot([Tick, Tick], [-0.05, 0.05], 'k-')

        ax_ScaleBar.set_xlim([1, 0])
        ax_ScaleBar.set_xticks(ScaleBarTicks)
        ax_ScaleBar.set_xticklabels(map(str, ScaleBarTicks),
                                    rotation=0,
                                    size=FontSize)
        ax_ScaleBar.set_xlabel('Distance', rotation=0, size=FontSize + 2)
        ax_ScaleBar.xaxis.set_label_position('bottom')
        ax_ScaleBar.tick_params(top='off',
                                bottom='off',
                                left='off',
                                right='off',
                                labeltop='off',
                                labelbottom='on',
                                labelleft='off',
                                labelright='off',
                                direction='out')

        ax_Heatmap = fig.add_axes(
            [ax_Heatmap_L, ax_Heatmap_B, ax_Heatmap_W, ax_Heatmap_H],
            frame_on=True,
            facecolor="white")
        Heatmap_Graphic = ax_Heatmap.imshow(OrderedDistMat,
                                            cmap='magma',
                                            aspect='auto',
                                            vmin=0,
                                            vmax=1,
                                            interpolation='none')
        for l in LineList:
            ax_Heatmap.axvline(l, color='k', lw=0.2)
            ax_Heatmap.axhline(l, color='k', lw=0.2)

        ax_Heatmap.set_xticks(TickLocList)
        ax_Heatmap.set_xticklabels(ClassLabelList, rotation=90, size=FontSize)
        ax_Heatmap.set_yticks(TickLocList)
        ax_Heatmap.set_yticklabels(ClassLabelList, rotation=0, size=FontSize)
        ax_Heatmap.tick_params(top='on',
                               bottom='off',
                               left='off',
                               right='on',
                               labeltop='on',
                               labelbottom='off',
                               labelleft='off',
                               labelright='on',
                               direction='out')

        ax_CBar = fig.add_axes([ax_CBar_L, ax_CBar_B, ax_CBar_W, ax_CBar_H],
                               frame_on=True,
                               facecolor="white")
        CBar_Graphic = fig.colorbar(Heatmap_Graphic,
                                    cax=ax_CBar,
                                    orientation="horizontal",
                                    ticks=[0, 0.25, 0.50, 0.75, 1])
        CBar_Graphic.ax.set_xticklabels(['0', '0.25', '0.50', '0.75', '1'],
                                        rotation=0,
                                        size=FontSize)
        CBar_Graphic.ax.set_xlabel('Distance', rotation=0, size=FontSize + 2)
        CBar_Graphic.ax.tick_params(top='off',
                                    bottom='on',
                                    left='off',
                                    right='off',
                                    labeltop='off',
                                    labelbottom='on',
                                    labelleft='off',
                                    labelright='off',
                                    direction='out')

        #Save the plot to file
        #-------------------------------------------------------------------------------
        plt.savefig(HeatmapWithDendrogramFile, format="pdf")

    if VirusGrouping == True:
        ################################################################################
        print "- Virus grouping"
        ################################################################################
        from GRAViTy.Utilities.OrderedSet import OrderedSet
        from GRAViTy.Utilities.VirusGrouping_Estimator import VirusGrouping_Estimator

        (VirusGroupingList, OptDistance_Cutoff, CorrelationScore,
         Theils_u_TaxoGroupingListGivenPred,
         Theils_u_PredGivenTaxoGroupingList) = VirusGrouping_Estimator(
             DistMat, Dendrogram_LinkageMethod, TaxoGroupingList)
        np.savetxt(
            fname=VirusGroupingFile,
            X=np.column_stack((
                map(", ".join, SeqIDLists),
                FamilyList,
                GenusList,
                VirusNameList,
                TaxoGroupingList,
                VirusGroupingList,
            )),
            fmt='%s',
            delimiter="\t",
            header=
            "Sequence identifier\tFamily\tGenus\tVirus name\tClass\tGrouping")

        with open(VirusGroupingFile, "a") as VirusGrouping_txt:
            VirusGrouping_txt.write(
                "\n" + "Distance cut off: %s\n" % OptDistance_Cutoff +
                "Theil's uncertainty correlation for the reference assignments given the predicted grouping U(Ref|Pred): %s\n"
                % Theils_u_TaxoGroupingListGivenPred +
                "Theil's uncertainty correlation for the predicted grouping given the reference assignments U(Pred|Ref): %s\n"
                % Theils_u_PredGivenTaxoGroupingList +
                "Symmetrical Theil's uncertainty correlation between the reference assignments and the predicted grouping U(Ref, Pred): %s\n"
                % CorrelationScore +
                "U(X|Y) == 1 means that knowing Y implies a perfect knowledge of X, but not vice-versa\n"
                +
                "U(X,Y) == 1 means that knowing Y implies a perfect knowledge of X and vice-versa\n"
            )
Exemple #47
0
 def test_int_labels(self):
     """Read newick formatted tree with numeric labels."""
     tree = Phylo.read(StringIO("(((0:0.1,1:0.1)0.99:0.1,2:0.1)0.98:0.0);"),
                       "newick")
     self.assertEqual({leaf.name
                       for leaf in tree.get_terminals()}, {"0", "1", "2"})
Exemple #48
0
 def test_unicode_exception(self):
     """Read a Newick file with a unicode byte order mark (BOM)."""
     with open(EX_NEWICK_BOM, encoding="utf-8") as handle:
         tree = Phylo.read(handle, "newick")
     self.assertEqual(len(tree.get_terminals()), 3)
Exemple #49
0
 def test_convert_phyloxml_binary(self):
     """Try writing phyloxml to a binary handle; fail on Py3."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     with tempfile.NamedTemporaryFile(mode="wb") as out_handle:
         self.assertRaises(TypeError, Phylo.write, trees, out_handle,
                           "phyloxml")
Exemple #50
0
 def test_newick_read_single1(self):
     """Read first Newick file with one tree."""
     tree = Phylo.read(EX_NEWICK, "newick")
     self.assertEqual(len(tree.get_terminals()), 28)
Exemple #51
0
def get_newick_tree_str(filename):
    tree = Phylo.parse(filename, 'newick').next()
    # output = StringIO.StringIO()
    output = io.StringIO()
    Phylo.write(tree, output, 'newick')
    return output.getvalue()
Exemple #52
0
 def test_convert_phyloxml_text(self):
     """Write phyloxml to a text handle."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     with tempfile.NamedTemporaryFile(mode="w") as out_handle:
         count = Phylo.write(trees, out_handle, "phyloxml")
     self.assertEqual(13, count)
Exemple #53
0
                        type=int,
                        default=100,
                        help='Resolution for numerical solution of ODE.')
    parser.add_argument('-delimiter',
                        default=None,
                        help='Field separator for node names in tree.')
    parser.add_argument('-position',
                        type=int,
                        default=-1,
                        help='Python index of field with tip date.')

    args = parser.parse_args()

    # open and parse tree
    try:
        tree = Phylo.read(args.tree, 'newick')
    except:
        print 'ERROR: Failed to parse tree from file', args.tree
        raise

    tree_height = max(tree.depths().values())
    tips = tree.get_terminals()
    ntips = len(tips)

    if args.delimiter is None:
        tip_heights = [0.] * ntips
    else:
        maxdate = 0
        tipdates = []
        for tip in tips:
            try:
Exemple #54
0
    print('Save alignment in PHYLIP formats')
    fn_ali_trim = '/tmp/largest_clone_ali_trim.phy'
    AlignIO.write(ali_unique, fn_ali_trim, 'phylip')

    print('Infer tree')
    res = sp.run(
        'phyml -i {:} -d nt'.format(fn_ali_trim),
        shell=True,
        stdout=sp.PIPE,
    )

    print('Rename and bush up leaves')
    fn_tree = '/tmp/largest_clone_ali_trim.phy_phyml_tree.txt'
    from Bio import Phylo
    tree = Phylo.read('/tmp/largest_clone_ali_trim.phy_phyml_tree.txt',
                      format='newick')
    leaves = tree.get_terminals()
    for leaf in leaves:
        lid = seqs_unique[int(leaf.name.split('_')[-1]) - 1]
        if len(seqs_red[lid]) == 1:
            leaf.name = seqs_red[lid][0]
        else:
            # Grow a hanging subtree
            for lid in seqs_red[lid]:
                leaf.clades.append(
                    leaf.__class__(
                        branch_length=1e-9,
                        name=lid,
                    ))
    tree.root.name = 'root'
Exemple #55
0
    geneID, geneFct = [], []
    outfile1 = open("single_origin.txt", "w")
    outfile2 = open("double_origin.txt", "w")

    for line in IDfile:
        line = line.rstrip()
        geneID.append(line.split('\t')[0])
        geneFct.append(line.split('\t')[1])

    for i in os.listdir(inDIR):
        if 'best' not in i: continue
        mynode, mynode1, mynode2 = False, False, False
        infile = open(inDIR + i, "r")
        line = infile.readline().rstrip()
        handle = StringIO(line)
        tree = Phylo.read(handle, "newick")

        for node in tree.get_nonterminals():
            subnodes = set([x.name for x in node.get_terminals()])
            if subnodes == campan1:
                mynode1 = True
            elif subnodes == campan2:
                mynode2 = True
            elif subnodes == campan:
                mynode = True

        if mynode == True:
            for x in xrange(len(geneID)):
                if geneID[x] in i:
                    outfile1.write("%s\t%s\t%s\n" %
                                   (geneID[x], geneFct[x], line))
Exemple #56
0
 def newick2phylo(self, nwk):
     handle = StringIO(nwk)
     phy = Phylo.read(handle, 'newick')
     return phy
Exemple #57
0
#!/usr/bin/env python

import sys
from Bio import Phylo

Phylo.convert(sys.argv[1], 'newick', sys.stdout, 'nexus')
Exemple #58
0
def phylo_from_str(nwk):
    """ Returns a Phylo.BaseTree object given Newick string """
    handle = StringIO()
    handle.write(nwk)
    handle.seek(0)
    return Phylo.read(handle, "newick")
Exemple #59
0
def build_tree_fasttree(filename_or_ali, rootname=None, VERBOSE=0):
    '''Build phylogenetic tree using FastTree
    
    Parameters:
      filename_or_ali: filename of a FASTA multiple sequence alignment, or a
                       Biopython alignment itself
      rootname (str): name of the leaf that should be the new root (outgroup)
      VERBOSE (int): verbosity level
    '''
    import os
    import subprocess as sp
    import StringIO
    from Bio import Phylo
    import numpy as np

    from ..filenames import fasttree_bin

    if isinstance(filename_or_ali, basestring):
        filename = filename_or_ali
    else:
        from Bio import AlignIO
        ali = filename_or_ali
        tmp_folder = os.getenv('HOME') + '/tmp/'
        filename = tmp_folder + 'tmp_fasttree_' + str(
            np.random.randint(1000000000)) + '.fasta'
        AlignIO.write(ali, filename, 'fasta')

    try:
        if VERBOSE >= 3:
            output = sp.check_output([fasttree_bin, '-nt', filename])
        else:
            output = sp.check_output([fasttree_bin, '-nt', filename],
                                     stderr=sp.STDOUT)
        tree_string = output.split('\n')[-2]

        tree = Phylo.read(StringIO.StringIO(tree_string), 'newick')
        tree.root.branch_length = 0.001

        if rootname is not None:
            if VERBOSE >= 2:
                print 'Reroot'
            for leaf in tree.get_terminals():
                if leaf.name == rootname:
                    root = leaf
                    break
            else:
                raise ValueError('Initial reference not found in tree')

            tree.root_with_outgroup(leaf)

    finally:
        if filename_or_ali != filename:
            os.remove(filename)

    # NOTE: nice fasttree trims sequence names at the first bracket, restore them
    if VERBOSE >= 2:
        print 'Check leaf labels integrity'
    if filename_or_ali == filename:
        from Bio import AlignIO
        ali = AlignIO.read(filename_or_ali, 'fasta')
    else:
        ali = filename_or_ali
    seq_names = set(seq.name for seq in ali)
    leaves_miss = set()
    for leaf in tree.get_terminals():
        if leaf.name in seq_names:
            seq_names.remove(leaf.name)
        else:
            leaves_miss.add(leaf)

    if len(leaves_miss):
        if VERBOSE >= 2:
            print 'Correcting leaf integrity'
        for leaf in leaves_miss:
            for name in seq_names:
                if name.split('(')[0] == leaf.name:
                    leaf.name = name
                    seq_names.remove(name)
                    break
            else:
                print 'Leaf has unexpected (truncated?) name:', leaf.name

    return tree
Exemple #60
0
def load_tree(tree_path: str, patient_zero: str):
    """Load a ML tree file (nwk format) into a Phylo tree object
    `patient_zero` is a user-specified reference name for rooting the tree"""
    tree = next(Phylo.parse(tree_path, 'newick'))
    tree.root_with_outgroup(patient_zero)
    return get_tree_coords(tree)