コード例 #1
0
ファイル: run_sampler.py プロジェクト: islamazhar/trees
def iterate(n_iters):
    for i in tqdm(xrange(n_iters)):
        sampler.sample()
        likelihoods.append(sampler.tree.marg_log_likelihood())

    plt.figure()
    plt.xlabel("Iterations", fontsize=fontsize)
    plt.ylabel("Data Log Likelihood", fontsize=fontsize)
    plt.plot(likelihoods)
    plt.legend(loc='best', fontsize=12)

    plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight')

    final_tree = sampler.tree.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('unconstrained-tree.png', bbox_inches='tight')
    graph = Phylo.to_networkx(tree)
    with open('unconstrained-tree.nwk', 'w') as fp:
        print >> fp, newick,
    nx.write_dot(graph, 'unconstrained-tree.dot')
    plt.show()
コード例 #2
0
ファイル: run_sampler.py プロジェクト: sharadmv/trees
def iterate(n_iters):
    for i in tqdm(xrange(n_iters)):
        sampler.sample()
        likelihoods.append(sampler.tree.marg_log_likelihood())

    plt.figure()
    plt.xlabel("Iterations", fontsize=fontsize)
    plt.ylabel("Data Log Likelihood", fontsize=fontsize)
    plt.plot(likelihoods)
    plt.legend(loc='best', fontsize=12)

    plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight')


    final_tree = sampler.tree.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('unconstrained-tree.png', bbox_inches='tight')
    graph = Phylo.to_networkx(tree)
    with open('unconstrained-tree.nwk', 'w') as fp:
        print >>fp, newick,
    nx.write_dot(graph, 'unconstrained-tree.dot')
    plt.show()
コード例 #3
0
ファイル: MainClass.py プロジェクト: Vachs/obiekt122
    def DrawSimple(self):
        if self.chosenFileName == '':
            self.showOpenFileWindow()

        if self.tree != 0:
            self.tree.root.color = '#808080'
            Phylo.draw_graphviz(self.tree, node_size=2500)
            pylab.show()
コード例 #4
0
ファイル: util.py プロジェクト: sharadmv/trees
def plot_tree(tree, y):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
コード例 #5
0
ファイル: util.py プロジェクト: islamazhar/trees
def plot_tree(tree, y):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
コード例 #6
0
ファイル: main.py プロジェクト: ragnarlodbrok1992/obiekt122
 def showGraphvizRootedTreeWindow(self):
     if self.chosenFileName == '':
         self.showOpenFileDialog()
     
     if self.tree != 0:
         #self.tree.rooted = True
         self.tree.root.color = '#808080'
         Phylo.draw_graphviz(self.tree, node_size = 2500)
         pylab.show()
コード例 #7
0
ファイル: MainClass.py プロジェクト: Vachs/obiekt122
    def showPathWindow(self):
        if self.cpath != '' and self.cpath2 != '':
            self.start = self.tree.find_clades(self.cpath).next()
            self.end = self.tree.find_clades(name=self.cpath2).next()

            for clade in self.tree.trace(self.start, self.end):
                clade.color = 'red'

            for clade in self.tree.find_clades():
                if not (clade in self.tree.trace(self.start, self.end)):
                    clade.color = 'grey'

            self.start.color = 'blue'

            # RYSOWANIE
            Phylo.draw_graphviz(self.tree, node_size=2500)
            pylab.plot(0, 0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)
            pylab.show()

        else:
            print "Nie wybrano punktow"

            # WYSWIETLA INFORMACJE
            img = pylab.imread('img/wally.png', 'rb')
            pylab.imshow(img)
            pylab.plot(0, 0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)

            # SHOWTIME
            pylab.show()
コード例 #8
0
ファイル: main.py プロジェクト: ragnarlodbrok1992/obiekt122
    def showPathWindow(self):
        if self.cb1 != '' and self.cb2 != '':            
            self.start = self.tree.find_clades(self.cb1).next()
            self.end = self.tree.find_clades(name = self.cb2).next()
            
            for clade in self.tree.trace(self.start, self.end):
                clade.color = 'red'
                
            for clade in self.tree.find_clades():
                if not(clade in self.tree.trace(self.start, self.end)):
                    clade.color = 'grey'
            
            self.start.color = 'blue'

            # RYSOWANIE
            Phylo.draw_graphviz(self.tree, node_size = 2500)
            pylab.plot(0,0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)
            pylab.show()

        else:
            print "Nie wybrano punktow"

            # WYSWIETLA INFORMACJE
            img = pylab.imread('wally.png', 'rb')
            pylab.imshow(img)
            pylab.plot(0,0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)

            # SHOWTIME
            pylab.show()
コード例 #9
0
ファイル: clustalo.py プロジェクト: zmactep/ig-pipeline
def save_utree(tree_path):
    logging.info("Saving unrooted tree.")
    prefix_name = tree_path[:tree_path.rfind('.')]
    unrooted_tree = Phylo.read(tree_path, 'newick')
    unrooted_tree.ladderize()
    Phylo.draw_graphviz(unrooted_tree)
    try:
        plt.savefig(common.UTREE_MASK % prefix_name)
    except:
        logging.error("Error while saving unrooted tree.")
        raise
コード例 #10
0
def main():
    print >>sys.stderr, "Print the result to file"
    if len(sys.argv) != 2:
        print >>sys.stderr, "Using python %s file.tre[nexus]" % sys.argv[0]
        sys.exit(0)
    # -------------------------------------------------
    file = sys.argv[1]
    progN = "twopi"
    # progN = 'neato'
    tree = Phylo.read(file, "nexus")
    Phylo.draw_graphviz(tree, prog=progN)
    file2 = file.replace("tre", "png")
    pylab.savefig(file2)
コード例 #11
0
def main():
    print >> sys.stderr, "Print the result to file"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s file.tre[nexus]' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------------
    file = sys.argv[1]
    progN = 'twopi'
    #progN = 'neato'
    tree = Phylo.read(file, 'nexus')
    Phylo.draw_graphviz(tree, prog=progN)
    file2 = file.replace('tre', 'png')
    pylab.savefig(file2)
コード例 #12
0
ファイル: run_linkage.py プロジェクト: sharadmv/trees
def plot_tree(final_tree, name):
    final_tree = final_tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    final_tree = final_tree.induced_subtree(animal_list)

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig("%s.png" % name, dpi=200, bbox_inches='tight')
コード例 #13
0
ファイル: run_linkage.py プロジェクト: islamazhar/trees
def plot_tree(final_tree, name):
    final_tree = final_tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    final_tree = final_tree.induced_subtree(animal_list)

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig("%s.png" % name, dpi=200, bbox_inches='tight')
コード例 #14
0
    def display(self, isascii=False):
        """
        Loads the tree from a file. And displays it.
        """
        self.tree = Phylo.read('{}.dnd'.format(self.filename), 'newick')
        
        if isascii:
            Phylo.draw_ascii(self.tree)

        try:
            import pylab
            Phylo.draw_graphviz(self.tree)
            pylab.show()
        except:
            print('Warning: failed to display using graphviz')
            Phylo.draw_ascii(self.tree)
コード例 #15
0
ファイル: utils.py プロジェクト: Stemweb/Stemweb
def newick2img(newick, filepath, branch_length=True, radial=True, width=800):
    '''
		Create image file from given newick file. This method has it's own lock so 
		that many newick's cannot be drawn into same figure and thus corrupt resulting
		image file.
		
		newick			absolute path to newick tree file
		
		filepath		absolute path to svg file to be created
		
		branch_length	boolean, if true shows branch lengths in numeric form
		
		radial			boolean, if true draws radial graph instead of "normal".
		
		width			width of the resulting svg file in pixels. 
	'''
    prog = 'dot' if radial else 'neato'
    nwk = Phylo.read(newick, 'newick')
    Phylo.draw_graphviz(nwk, prog=prog, node_size=500)
    #Phylo.draw(nwk, do_show = False)
    plt.savefig(filepath)
コード例 #16
0
ファイル: utils.py プロジェクト: Stemweb/Stemweb
def newick2img(newick, filepath, branch_length = True, radial = True, width = 800):
	'''
		Create image file from given newick file. This method has it's own lock so 
		that many newick's cannot be drawn into same figure and thus corrupt resulting
		image file.
		
		newick			absolute path to newick tree file
		
		filepath		absolute path to svg file to be created
		
		branch_length	boolean, if true shows branch lengths in numeric form
		
		radial			boolean, if true draws radial graph instead of "normal".
		
		width			width of the resulting svg file in pixels. 
	'''
	prog = 'dot' if radial else 'neato'
	nwk = Phylo.read(newick, 'newick')
	Phylo.draw_graphviz(nwk, prog = prog, node_size = 500)
	#Phylo.draw(nwk, do_show = False)
	plt.savefig(filepath)
コード例 #17
0
def test(n = 10, theta = 1.0, L = 10):
    sim = fsmi.simulator_KingmanFiniteSites(n,theta,L)
    myTree = sim_to_tree(sim)

    # str_newick_test = '(A,(B,C)D);'
    # dpTree_test = dp.Tree.get(data = str_newick_test, schema = 'newick')
    # dpTree_test.print_plot()

    str_newick_sim = myTree.str_newick(True)
    print str_newick_sim
    dpTree_sim = dp.Tree.get(data = str_newick_sim, schema = 'newick')
    dpTree_sim.print_plot()

    phyloTree = Phylo.read(StringIO(str_newick_sim),'newick')
    print phyloTree

    plt.figure()
    Phylo.draw(phyloTree)
    phyloTree.rooted = True
    plt.figure()
    Phylo.draw_graphviz(phyloTree, prog = 'dot')
    plt.draw()
コード例 #18
0
def main(args):
    # Will need to make format an option here if we ever need to
    tree = Phylo.read(args.tree, args.tree_format)
    meta = csv.DictReader(args.metadata)
    color_map = color_mapping(meta, args.color_by, args.palette)
    legend = color_map['by_group']
    sequence_mapping = color_map['by_sequence']

    # Write out the tree
    tree = apply_color_mapping(tree, sequence_mapping)
    if args.image_out:
        Phylo.draw_graphviz(tree)
        pylab.show()
        pylab.savefig(args.out)
    else:
        Phylo.write(tree, args.out, 'phyloxml')

    # Write out our legend
    leg_handle = open(args.out + '.legend', 'w')
    write_color_legend(legend, leg_handle, args.color_by)

    # close up shop
    leg_handle.close()
    args.metadata.close()
コード例 #19
0
def main(args):
    # Will need to make format an option here if we ever need to
    tree = Phylo.read(args.tree, args.tree_format)
    meta = csv.DictReader(args.metadata)
    color_map = color_mapping(meta, args.color_by, args.palette)
    legend = color_map['by_group']
    sequence_mapping = color_map['by_sequence']

    # Write out the tree
    tree = apply_color_mapping(tree, sequence_mapping)
    if args.image_out:
        Phylo.draw_graphviz(tree)
        pylab.show()
        pylab.savefig(args.out)
    else:
        Phylo.write(tree, args.out, 'phyloxml')

    # Write out our legend
    leg_handle = open(args.out + '.legend', 'w')
    write_color_legend(legend, leg_handle, args.color_by)

    # close up shop
    leg_handle.close()
    args.metadata.close()
コード例 #20
0
ファイル: test.py プロジェクト: cswarth/hiv-sim
import matplotlib

matplotlib.use('GTKAgg')

from Bio import Phylo
import pylab

tree = Phylo.read('apaf.xml', 'phyloxml')

Phylo.draw_graphviz(tree)
pylab.show()
コード例 #21
0
        def out():

            records = SeqIO.parse("%s" % e1.get(), "fasta")

            lens = []
            lens2 = []
            file = open("phylo.phy", 'w')
            for record in records:
                ids = record.id
                sequence = record.seq[0:100]
                lens.append(record.id)
                lens2.append(record.seq)
                line = "%s   %s" % (ids, sequence)
                print(line)

            lengthmax = len(max(lens, key=len))

            lengthmin = len(min(lens, key=len))

            file.write("   %s     100\n" % len(lens))

            for i, item in enumerate(lens):
                start = i - 1
                end = i - 1
                seq = lens2[end]

                if len(item) == int(lengthmax):
                    if i < 10:
                        ids = "%s%s%s" % (i, "-", item + "-")
                        ids = ids
                        ids = ids.replace(".", "")
                        ids = ids.replace("_", "")
                        print("1")
                    else:
                        ids = "%s%s%s" % (i, "-", item)
                        ids = ids
                        ids = ids.replace(".", "")
                        ids = ids.replace("_", "")
                        print("1")

                elif len(item) < int(lengthmax):
                    ids = "%s%s%s" % (i, "-", item)
                    add = int(lengthmax) - int(len(item))
                    ids = ids + (add * "-") + "-"
                    ids = ids
                    ids = ids.replace(".", "")
                    ids = ids.replace("_", "")
                    print("2")

                line = "%s          %s\n" % (ids.replace(".", ""), seq[0:100])
                print(line)
                file.write(line)
            file.close()

            # Read the sequences and align
            aln = AlignIO.read('phylo.phy', 'phylip')

            # Print the alignment
            print(aln)

            # Calculate the distance matrix
            calculator = DistanceCalculator('identity')
            dm = calculator.get_distance(aln)

            # Print the distance Matrix
            print('\nDistance Matrix\n===================')
            print(dm)

            # Construct the phylogenetic tree using UPGMA algorithm
            constructor = DistanceTreeConstructor()
            tree = constructor.upgma(dm)

            Phylo.write(tree, 'apaf.xml', 'phyloxml')
            tree = Phylo.read('apaf.xml', 'phyloxml')
            Phylo.draw_graphviz(tree)
            pylab.show()

            win.destroy()
コード例 #22
0
ファイル: pt.py プロジェクト: mbagg4152/pfpy
def proc():
    tree = phylo.read('agi-tree.xml', 'phyloxml')
    phylo.draw_graphviz(tree)
    pylab.show()
コード例 #23
0
def Arvore2():    
    tree = Phylo.read("backs\clustalw.dnd", "newick")
    tree.rooted = True
    tree = tree.as_phyloxml()
    Phylo.draw_graphviz(tree, fontsize='6')
    pylab.savefig(os.path.join(outpath,'%s.sequences.png'%model))
コード例 #24
0
            Clade(branch_length=0.102, name='A')
            Clade(branch_length=0.23, name='B')
        Clade(branch_length=0.4, name='C')
...
<img src="Phylo-draw-apaf1.png" title="fig:Rooted phylogram, via Phylo.draw" alt="Rooted phylogram, via Phylo.draw" width="500" />

tree = Phylo.read('apaf.xml', 'phyloxml')
tree.ladderize()   # Flip branches so deeper clades are displayed at top
Phylo.draw(tree)

<img src="Phylo-apaf.png" title="Unrooted tree with colored nodes" alt="Unrooted tree with colored nodes" width="500" />

#for basic dendrogram
import pylab
tree = Phylo.read('apaf.xml', 'phyloxml')
Phylo.draw_graphviz(tree)
pylab.show()

#simple tree wiht defined branch lengths
tree = Phylo.parse('phyloxml_examples.xml', 'phyloxml').next()
Phylo.draw_ascii(tree)

apaf = Phylo.read('apaf.xml', 'phyloxml')
Phylo.draw_ascii(apaf)

#import networkx, pylab
#tree = Phylo.read('example.xml', 'phyloxml')
#net = Phylo.to_networkx(tree)
#networkx.draw(net)
#pylab.show()
コード例 #25
0
    def out():
        records = SeqIO.parse(e1.get(), "fasta")

        lens = []

        for record in records:
            print(record.seq)
            ids = record.id
            sequence = record.seq
            op = lens.append(record.id)
            # print(lens)

        try:
            lengthmax = len(max(lens, key=len))
            lengthmin = len(min(lens, key=len))
        except:
            lengthmax = "0"
        line = "  %s             %s\n" % (len(lens), "125")
        file = open("phylo.phy", "w")
        file.write(line)

        for i, id in enumerate(lens):
            if lengthmin < int(lengthmax):
                add = int(lengthmax) - int(len(id))
                # print(i)
                id = id + (add * "-")
                id = id.replace(".", "")
                id = id.replace("_", "")
                to_be_write = "%s%s%s    %s" % (i, "-", id, sequence[0:100])
                # file.write("  %s                    %s\n"%(num_rec,seqlen))
                file.writelines(str("%s\n" % to_be_write))
                print(id)


            else:
                add = int(lengthmax) - int(len(id))
                id = id + (add * "-")
                id = id.replace(".", "")
                id = id.replace("_", "")
                to_be_write = "%s    %s" % (id, sequence[0:100])
                # file.write("  %s                    %s\n"%(num_rec,seqlen))

                file.writelines(str("%s\n" % (to_be_write)))
                print(id)

        # Read the sequences and align
        aln = AlignIO.read('phylo.phy', 'phylip')

        # Print the alignment
        print(aln)

        # Calculate the distance matrix
        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(aln)

        # Print the distance Matrix
        print('\nDistance Matrix\n===================')
        print(dm)

        # Construct the phylogenetic tree using UPGMA algorithm
        constructor = DistanceTreeConstructor()
        tree = constructor.upgma(dm)

        Phylo.write(tree, 'apaf.xml', 'phyloxml')
        tree = Phylo.read('apaf.xml', 'phyloxml')
        Phylo.draw_graphviz(tree)
        pylab.show()

        win.destroy()
コード例 #26
0
ファイル: create_online_graph.py プロジェクト: sharadmv/trees
plt.figure()
plt.xlim([0, n_iters + constraint_add])
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
plt.plot(likelihoods)
plt.legend(loc="best", fontsize=12)

plt.savefig("online-likelihoods.png", bbox_inches="tight")


final_tree = sampler.tree.copy()

plt.figure()
plot_tree_2d(final_tree, X, pca)

for node in final_tree.dfs():
    if node.is_leaf():
        node.point = y[node.point]

newick = final_tree.to_newick()
tree = Phylo.read(StringIO(newick), "newick")

plt.figure()
Phylo.draw_graphviz(tree, prog="neato")
plt.savefig("tree.png", bbox_inches="tight")
graph = Phylo.to_networkx(tree)
with open("tree.nwk", "w") as fp:
    print >> fp, newick,
nx.write_dot(graph, "tree.dot")
plt.show()
コード例 #27
0
plt.figure()
plt.xlim([0, n_iters + constraint_add])
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
plt.plot(likelihoods)
plt.legend(loc='best', fontsize=12)

plt.savefig('online-likelihoods.png', bbox_inches='tight')

final_tree = sampler.tree.copy()

plt.figure()
plot_tree_2d(final_tree, X, pca)

for node in final_tree.dfs():
    if node.is_leaf():
        node.point = y[node.point]

newick = final_tree.to_newick()
tree = Phylo.read(StringIO(newick), 'newick')

plt.figure()
Phylo.draw_graphviz(tree, prog='neato')
plt.savefig('tree.png', bbox_inches='tight')
graph = Phylo.to_networkx(tree)
with open('tree.nwk', 'w') as fp:
    print >> fp, newick,
nx.write_dot(graph, 'tree.dot')
plt.show()
コード例 #28
0
def processing(raw_fasta_path, out_dir_path):
    if not os.path.exists(out_dir_path):
        logging.info("Making directory {0}".format(out_dir_path))
        os.makedirs(out_dir_path)

    deduplicated_fasta = remove_duplicates(SeqIO.parse(raw_fasta_path,
                                                       "fasta"))
    base = os.path.basename(raw_fasta_path)
    fasta_path = os.path.join(out_dir_path, base)

    logging.info("Writing FASTA in {0}".format(fasta_path))
    SeqIO.write(deduplicated_fasta, fasta_path, "fasta")

    # Multiple sequence alignment
    cline = ClustalwCommandline("clustalw2", infile=fasta_path)
    stdout, stderr = cline()
    logging.info(cline)

    clustalw_result_path = fasta_path.replace(".fasta", ".aln")

    alignment_dict = SeqIO.to_dict(
        AlignIO.read(clustalw_result_path, "clustal"))

    # writing alignment table in .txt
    with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout:
        fout.write("\n".join(
            str(record.seq) for record in alignment_dict.itervalues()))

    # alignment tree drawing
    tree_path = fasta_path.replace(".fasta", ".dnd")
    tree = Phylo.read(tree_path, "newick")
    tree.ladderize()

    # with labels
    Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", ""))
    plt.savefig(os.path.join(
        out_dir_path, "figure_with_labels.pdf"))  # need pygraphviz, pylab

    # Clustering
    ids = dict(enumerate(alignment_dict.keys()))
    distance_matrix = np.zeros([len(ids)] * 2)
    for i, j in itertools.combinations(xrange(len(ids)), r=2):
        distance_matrix[i][j] = distance_matrix[j][i] = \
            distance(alignment_dict[ids[i]], alignment_dict[ids[j]])

    # Compute and plot dendrogram
    fig = plt.figure()
    axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8])
    Y = linkage(distance_matrix, method="centroid")
    cutoff = 0.5 * max(Y[:, 2])
    clusters = fcluster(Y, cutoff, "distance")
    Z = dendrogram(Y, orientation="right", color_threshold=cutoff)
    axdendro.set_yticks([])

    # Plot distance matrix
    axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8])
    index = Z["leaves"]
    distance_matrix = distance_matrix[index, :]
    distance_matrix = distance_matrix[:, index]
    im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower")
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar
    axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8])
    plt.colorbar(im, cax=axcolor)

    # Display and save figure
    dendogram_path = os.path.join(out_dir_path, "dendogram.png")
    fig.savefig(dendogram_path)

    fasta_clusters = defaultdict(list)
    for i, cluster in enumerate(clusters):
        fasta_id = ids[i]
        fasta_clusters[cluster].append(alignment_dict[fasta_id])

    # Saving information about clusters
    clusters_dir_path = os.path.join(out_dir_path, "clusters")
    if not os.path.exists(clusters_dir_path):
        os.makedirs(clusters_dir_path)
    clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt")
    meta_file = open(clusters_meta_path, "w")
    for cluster_id, cluster in fasta_clusters.iteritems():
        cluster_path = os.path.join(clusters_dir_path,
                                    "cluster_{0}.fasta".format(cluster_id))
        SeqIO.write(cluster, cluster_path, "fasta")
        summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster))
        consensus = summary_align.dumb_consensus()
        pssm = summary_align.pos_specific_score_matrix(consensus,
                                                       chars_to_ignore=['-'])
        frequencies = dict.fromkeys(IUPAC.protein.letters, 0)
        frequencies.update(
            (key, len(list(group)))
            for key, group in itertools.groupby(sorted(consensus)))
        frequencies.pop("X")

        meta_file.write("""Cluster ID: {0}
Cluster size: {1}
Consensus:
{2}

PSSM:
{3}
Frequencies in consensus:
{4}


""".format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm,
           pprint.pformat(frequencies)))

        fig = plt.figure()
        pos = np.arange(len(IUPAC.protein.letters))
        width = .5  # gives histogram aspect to the bar diagram

        ax = plt.axes()
        ax.set_xticks(pos + (width / 2))
        ax.set_xticklabels(IUPAC.protein.letters)

        plt.bar(pos, [frequencies[letter] for letter in IUPAC.protein.letters],
                width,
                color='r')
        frequencies_path = os.path.join(
            clusters_dir_path, "frequencies_{0}.png".format(cluster_id))
        fig.savefig(frequencies_path)
コード例 #29
0
ファイル: clusterGraphs.py プロジェクト: glennhickey/hgvm
def compute_tree(options, mat, names):
    """ make upgma hierarchical clustering and write it as png and
    graphviz dot
    """
    # oops, convert to biopython matrix
    matrix = []
    for i in xrange(len(names)):
        row = []
        for j in xrange(i + 1):
            # tree constructor writes 0-distances as 1s for some reason
            # so we hack around here
            val = float(mat[names[i]][names[j]])
            if val == 0.:
                val = 1e-10
            elif val == 1.:
                val = 1.1
            row.append(val)
        matrix.append(row)
    dm = _DistanceMatrix(names, matrix)

    # upgma tree
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    robust_makedirs(os.path.dirname(tree_path(options)))
    Phylo.write(tree, tree_path(options), "newick")

    # png tree -- note : doesn't work in toil
    def f(x):
        if "Inner" in str(x):
            return ""
        else:
            return x
    Phylo.draw_graphviz(tree, label_func = f, node_size=1000, node_shape="s", font_size=10)
    pylab.savefig(tree_path(options).replace("newick", "png"))

    # graphviz
    # get networkx graph
    nxgraph = Phylo.to_networkx(tree)
    # make undirected
    nxgraph = nx.Graph(nxgraph)
    # push names to name labels
    nxgraph = nx.convert_node_labels_to_integers(nxgraph, label_attribute="label")
    for node_id in nxgraph.nodes():
        node = nxgraph.node[node_id]
        if "Inner" in str(node["label"]):
            node["label"] = "\"\""
            node["width"] = 0.001
            node["height"] = 0.001
        else:
            node["fontsize"] = 18
    for edge_id in nxgraph.edges():
        edge = nxgraph.edge[edge_id[0]][edge_id[1]]
        # in graphviz, weight means something else, so make it a label
        weight = float(edge["weight"])
        # undo hack from above
        if weight > 1:
            weight = 1.
        if weight <= 1e-10 or weight == 1.:
            weight = 0.
        edge["weight"] = None
        edge["label"] = "{0:.3g}".format(float(weight) * 100.)
        edge["fontsize"] = 14
        edge["len"] = draw_len(weight)
    nx.write_dot(nxgraph, tree_path(options).replace("newick", "dot"))
コード例 #30
0
def save_tree(tree, filename):
    Phylo.draw_graphviz(tree, prog='dot')  # Draw the tree
    plt.title(filename)  # set Title for figure
    plt.savefig('./Output/images/' + filename + '.png', dpi=100)  # Save tree in an image
    plt.close()  # Close the figure
コード例 #31
0
plt.legend(loc='best', fontsize=12)
plt.savefig('offline-scores.png', bbox_inches='tight')

plt.figure()
plt.xlim([0, n_iters])
# plt.ylim(ymin=-400)
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
for name, likelihood in likelihoods.items():
    plt.plot(likelihood, label=name)
plt.legend(loc='best', fontsize=12)
plt.savefig('offline-likelihoods.png', bbox_inches='tight')

for type, model in models.items():
    final_tree = model.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('tree-%s.png' % type, bbox_inches='tight')
plt.show()

コード例 #32
0
def processing(raw_fasta_path, out_dir_path):
    if not os.path.exists(out_dir_path):
        logging.info("Making directory {0}".format(out_dir_path))
        os.makedirs(out_dir_path)

    deduplicated_fasta = remove_duplicates(
        SeqIO.parse(raw_fasta_path, "fasta"))
    base = os.path.basename(raw_fasta_path)
    fasta_path = os.path.join(out_dir_path, base)

    logging.info("Writing FASTA in {0}".format(fasta_path))
    SeqIO.write(deduplicated_fasta, fasta_path, "fasta")

    # Multiple sequence alignment
    cline = ClustalwCommandline("clustalw2", infile=fasta_path)
    stdout, stderr = cline()
    logging.info(cline)

    clustalw_result_path = fasta_path.replace(".fasta", ".aln")

    alignment_dict = SeqIO.to_dict(
        AlignIO.read(clustalw_result_path, "clustal"))

    # writing alignment table in .txt
    with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout:
        fout.write(
            "\n".join(
                str(record.seq) for record in alignment_dict.itervalues()))

    # alignment tree drawing
    tree_path = fasta_path.replace(".fasta", ".dnd")
    tree = Phylo.read(tree_path, "newick")
    tree.ladderize()

    # with labels
    Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", ""))
    plt.savefig(os.path.join(out_dir_path,
                               "figure_with_labels.pdf"))  # need pygraphviz, pylab

    # Clustering
    ids = dict(enumerate(alignment_dict.keys()))
    distance_matrix = np.zeros([len(ids)] * 2)
    for i, j in itertools.combinations(xrange(len(ids)), r=2):
        distance_matrix[i][j] = distance_matrix[j][i] = \
            distance(alignment_dict[ids[i]], alignment_dict[ids[j]])

    # Compute and plot dendrogram
    fig = plt.figure()
    axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8])
    Y = linkage(distance_matrix, method="centroid")
    cutoff = 0.5 * max(Y[:, 2])
    clusters = fcluster(Y, cutoff, "distance")
    Z = dendrogram(Y, orientation="right", color_threshold=cutoff)
    axdendro.set_yticks([])

    # Plot distance matrix
    axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8])
    index = Z["leaves"]
    distance_matrix = distance_matrix[index, :]
    distance_matrix = distance_matrix[:, index]
    im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower")
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar
    axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8])
    plt.colorbar(im, cax=axcolor)

    # Display and save figure
    dendogram_path = os.path.join(out_dir_path, "dendogram.png")
    fig.savefig(dendogram_path)

    fasta_clusters = defaultdict(list)
    for i, cluster in enumerate(clusters):
        fasta_id = ids[i]
        fasta_clusters[cluster].append(alignment_dict[fasta_id])

    # Saving information about clusters
    clusters_dir_path = os.path.join(out_dir_path, "clusters")
    if not os.path.exists(clusters_dir_path):
        os.makedirs(clusters_dir_path)
    clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt")
    meta_file = open(clusters_meta_path, "w")
    for cluster_id, cluster in fasta_clusters.iteritems():
        cluster_path = os.path.join(clusters_dir_path,
                                    "cluster_{0}.fasta".format(cluster_id))
        SeqIO.write(cluster, cluster_path, "fasta")
        summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster))
        consensus = summary_align.dumb_consensus()
        pssm = summary_align.pos_specific_score_matrix(consensus,
                                                       chars_to_ignore=['-'])
        frequencies = dict.fromkeys(IUPAC.protein.letters, 0)
        frequencies.update(
            (key, len(list(group)))
            for key, group in itertools.groupby(sorted(consensus)))
        frequencies.pop("X")

        meta_file.write("""Cluster ID: {0}
Cluster size: {1}
Consensus:
{2}

PSSM:
{3}
Frequencies in consensus:
{4}


""".format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm,
           pprint.pformat(frequencies)))

        fig = plt.figure()
        pos = np.arange(len(IUPAC.protein.letters))
        width = .5     # gives histogram aspect to the bar diagram

        ax = plt.axes()
        ax.set_xticks(pos + (width / 2))
        ax.set_xticklabels(IUPAC.protein.letters)

        plt.bar(pos,
                [frequencies[letter] for letter in IUPAC.protein.letters],
                width, color='r')
        frequencies_path = os.path.join(
            clusters_dir_path, "frequencies_{0}.png".format(cluster_id))
        fig.savefig(frequencies_path)
コード例 #33
0
ファイル: Plot_trees.py プロジェクト: YashasviMantha/NLP
from Bio import Phylo
# from pygraphviz import *
# from networkx.drawing import nx_agraph

tree = Phylo.read("Plotly/small.newick", 'newick')

Phylo.draw_graphviz(tree, prog='dot')