Ejemplo n.º 1
0
def test_collapse_branches():
    before_tree_ascii = """
  _________________________________________________________________ MK088171.1
 |
_|_______________ MK071699.1
 |
 |      _______________________________________________ MH845413.2
 |_____|
       |                  _____ MH784405.1
       |_________________|
                         |______ MH784404.1
    """.strip()
    after_tree_ascii = """
  _________________________________________________________________ MK088171.1
 |
 |_______________ MK071699.1
_|
 |_____________________________________________________ MH845413.2
 |
 |                        _____ MH784405.1
 |_______________________|
                         |______ MH784404.1
    """.strip()

    tree: Tree = Phylo.read(input_newick, 'newick')
    from io import StringIO
    sio = StringIO()
    Phylo.draw_ascii(tree, sio)
    pre_collapse_tree = sio.getvalue().strip()
    assert pre_collapse_tree == before_tree_ascii
    collapse_branches(tree, 95)
    sio = StringIO()
    Phylo.draw_ascii(tree, sio)
    post_collapse_tree = sio.getvalue().strip()
    assert post_collapse_tree == after_tree_ascii
Ejemplo n.º 2
0
def main():

    try:
        gene = sys.argv[1]
        gene_space = sys.argv[2]

    except IndexError:
        sys.exit("Usage: dirtyTree.py <gene name> <gene space>")

    # Fetch fasta entry of the focal gene
    fastagrep(gene, gene_space, gene + ".fasta")

    # Blast and filter results
    blast_res = local_blast(gene + ".fasta", gene_space)

    homologs = filter_blast_results(gene + ".fasta.blastout")
    if len(homologs) == 0:
        sys.exit("No homologs found for %s, exiting ..." % (gene))

    # Fetch sequences of homologs, align them, and trim the alignment
    fastagrep(homologs, gene_space, gene + ".homologs.fasta")

    aln = muscle_align(gene + ".homologs.fasta",
                       gene + ".homologs.aligned.fasta")

    aln_trimmed = strumenti.trim_alignment(aln, max_prop_missing=0.2)

    # Estimate a gene tree using the neighbor-joining algorithm
    tree = strumenti.neighbor_joining_tree(aln_trimmed, 'blosum62')

    sys.stdout.write("\nHomolog tree for %s\n" % (gene))
    Phylo.draw_ascii(tree, file=sys.stdout, column_width=100)
    sys.stderr.write("\n%i positions were used to estimate the tree.\n" %
                     (aln_trimmed.get_alignment_length()))
Ejemplo n.º 3
0
 def test_draw_ascii(self):
     """Tree to Graph conversion, if networkx is available."""
     handle = StringIO()
     tree = Phylo.read(EX_APAF, 'phyloxml')
     Phylo.draw_ascii(tree, file=handle)
     Phylo.draw_ascii(tree, file=handle, column_width=120)
     handle.close()
Ejemplo n.º 4
0
    def makeRootUnroot(self, mod):
        if self.path1 != '' and self.path2 == '':
            # get files extensions
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            # open tree files
            self.trees = []
            self.drzewo = []

            # first tree
            self.f = open(self.path1, 'r')
            self.miss = self.f.read()
            self.tree1 = Trees.Tree(self.miss)
            self.dre = ts.Tree(self.miss)
            print "# Before modification"
            print self.tree1

            if mod == 0:
                print "# After modification -- Rooting (at midpoint):"
                self.dre.root_midpoint()
            elif mod == 1:
                print "# After modification -- UnRooting:"
                self.dre.unroot()
            elif mod == 2:
                print "# After modification -- Rooting (balanced):"
                self.dre.root_balanced()
            print self.dre
            print "\nDetails about tree:"
            self.dre.display()
            Phylo.draw_ascii(self.tree1)
            self.show()
            self.f.close()
Ejemplo n.º 5
0
def dna(file_path, file_format, algorithm):
    # Read the sequences and align
    aln = AlignIO.read(file_path, file_format)

    # Print the alignment
    print(aln)

    # Calculate the distance matrix
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)

    # Print the distance Matrix
    print('\nDistance Matrix\n===================')
    print(calculator)

    # Construct the phylogenetic tree using choosen algorithm
    constructor = DistanceTreeConstructor()
    if algorithm.lower() == 'upgma':
        tree = constructor.upgma(dm)
    elif algorithm.lower() == 'nj':
        tree = constructor.nj(dm)
    else:
        click.echo('Invalid algorithm!')

    # Draw the phylogenetic tree
    Phylo.draw(tree)

    # Print the phylogenetic tree in the terminal
    print('\nPhylogenetic Tree\n===================')
    Phylo.draw_ascii(tree)
Ejemplo n.º 6
0
    def _calculate_gsi(self):
        """
        Method for calculating Gene Support Indices
        :return:
        """
        LOGGER.info("Calculating Gene Support Indices (GSIs)"
                    " from the gene trees..")
        genome_num = 0
        bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir)
        for file in os.listdir(bcg_dir):
            if file.endswith('.bcg'):
                genome_num += 1

        nwk_file = os.path.join(self._align_output_dir, "all_gene.trees")
        trees = Phylo.parse(nwk_file, 'newick')
        tree = Consensus.majority_consensus(trees,
                                            cutoff=(100-self.config.gsi_threshold) * genome_num/100)
        Phylo.draw_ascii(tree)
        ubcg_gsi_file = os.path.join(self._align_output_dir,
                                     f'UBCG_gsi({self._bcg_num}'
                                     f'){self.config.postfixes.align_tree_const}')
        with open(ubcg_gsi_file, 'w') as f:
            Phylo.write(tree, f, 'newick')

        LOGGER.info("The final tree marked with GSI was written"
                    " to %s", ubcg_gsi_file)
def root(tree, clade, filename):
    """
    roots tree in newick format
    on a single column list of outgroup
    clade names
    
    Parameters
    ----------
    argv: tree
        newick tree file
    argv: clade
        single column file of outgroup taxa
    argv: filename
        output file name
    """

    # read in tree
    tree = Phylo.read(tree, 'newick')

    # initialize variables for terminal branch length
    clade = [line.rstrip('\n') for line in open(clade)]

    outgroup = [{'name': taxon_name} for taxon_name in clade]

    tree.root(outgroup)

    Phylo.draw_ascii(tree)

    Phylo.write(tree, filename, 'newick')
Ejemplo n.º 8
0
 def printGeneTree(self):
     """
     Print gene trees with matplotlib and in the terminal for the four largest target ORFs of coronaviruses.
     Takes a .phy file containing multiple alligned sequences, generates a matrix based on sequence composition 
     and compares each sequence (genome) to one another. sequences with grater scores (similarity) are ranked closer
     together on the phylogenetic trees.
     input: A .phy file that contains coronavirus gene sequences to draw phylogenetic tree
     output: A visual representation of a gene tree on terminal and matplotlib
     """
     align = AlignIO.read(
         self.newPhylip,
         'phylip')  # Reads created .phy file containing the SeqRecord
     #print (align) # prints concatenated allignments
     calculator = DistanceCalculator('identity')
     dm = calculator.get_distance(align)  # Calculate the distance matrix
     print(
         '\n======================================== DISTANCE MATRIX =======================================\n'
     )
     print(dm, "\n\n")  # Print the distance Matrix
     constructor = DistanceTreeConstructor(
     )  # Construct the phylogenetic tree using UPGMA algorithm
     tree = constructor.upgma(dm)
     print(
         '\n========================================= GENE TREE ===========================================\n'
     )
     Phylo.draw(
         tree
     )  # Draw the phylogenetic tree (must install matplotlib to use this formatting)
     Phylo.draw_ascii(tree)  # Print the phylogenetic tree in terminal
Ejemplo n.º 9
0
def main():
	args = parse_arguments()

	msa = ParsimonyTree.read_msa(args.a)
	i_tree = ParsimonyTree.read_tree(args.n)

	nb_f = ParsimonyTree.get_nni_neighbors
	if args.spr:
		nb_f = ParsimonyTree.get_spr_neighbors
	elif args.tbr:
		nb_f = ParsimonyTree.get_tbr_neighbors

	mcmc = MonteCarlo(msa, i_tree, nb_f, args.r, args.p)
	f_tree = mcmc.get_tree()

	with open(args.o, "w") as outfile:
		Phylo.write(f_tree, outfile, "newick")

	print("\n=========================\n")
	print("Original Tree")
	print("Score:", ParsimonyTree.get_parsimony_score(msa, i_tree))
	Phylo.draw(i_tree)
	Phylo.draw_ascii(i_tree)

	print("\n=========================\n")
	print("Final Tree")
	print("Score:", ParsimonyTree.get_parsimony_score(msa, f_tree))
	Phylo.draw(f_tree)
	Phylo.draw_ascii(f_tree)

	print("\n=========================\n")
	print("Histogram of Parsimony Scores")
	plt.title("Histogram of Parsimony Scores")
	plt.hist(mcmc.get_scores())
	plt.show()
Ejemplo n.º 10
0
def build_phylogeny_trees():
    path = "out/homologous_gene_sequences/"
    output_path = "out/aligned_homologous_gene_sequences/"

    for homologous_gene_sequence in os.listdir(path):
        input = path + homologous_gene_sequence
        output = output_path + homologous_gene_sequence
    
        clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True)
        os.system(str(clustal_omega))

        multi_seq_align = AlignIO.read(output, 'fasta')

        # Distance Matrix
        calculator = DistanceCalculator('identity')
        dist_mat = calculator.get_distance(multi_seq_align)

        tree_constructor = DistanceTreeConstructor()
        phylo_tree = tree_constructor.upgma(dist_mat)

        Phylo.draw(phylo_tree)

        print('\nPhylogenetic Tree\n', homologous_gene_sequence)
        Phylo.draw_ascii(phylo_tree)
        Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
Ejemplo n.º 11
0
def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False):
    """Return Phylo from TaxonNamesResolver class."""
    ranks = resolver.retrieve('classification_path_ranks')
    qnames = resolver.retrieve('query_name')
    lineages = resolver.retrieve('classification_path')
    # replace ' ' with '_' for taxon tree
    qnames = [re.sub("\s", "_", e) for e in qnames]
    resolved_names_bool = [e in namesdict.keys() for e in qnames]
    ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e]
    lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e]
    # identify unresolved names
    unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool)
                        if not e]
    idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e]
    statement = "Unresolved names: "
    for each in unresolved_names:
        statement += " " + each
    logger.debug(statement)
    # make taxdict
    taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages,
                      taxonomy=taxonomy)
    # make treestring
    treestring = taxTree(taxdict)
    if not taxonomy:
        d = 22  # default_taxonomy + 1 in tnr
    else:
        d = len(taxonomy) + 1
    # add outgroup
    treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d))
    tree = Phylo.read(StringIO(treestring), "newick")
    if draw:
        Phylo.draw_ascii(tree)
    return tree
Ejemplo n.º 12
0
    def makeRootUnroot(self, mod):
        if self.path1 != '' and self.path2 == '':
            # get files extensions
            self.fileEx1 = (os.path.splitext(self.path1)[1])[1:]
            self.fileEx2 = (os.path.splitext(self.path2)[1])[1:]

            # open tree files
            self.trees = []
            self.drzewo = []

            # first tree
            self.f = open(self.path1, 'r')
            self.miss = self.f.read()
            self.tree1 = Trees.Tree(self.miss)
            self.dre = ts.Tree(self.miss)
            print "# Before modification"
            print self.tree1

            if mod == 0:
                print "# After modification -- Rooting (at midpoint):"
                self.dre.root_midpoint()
            elif mod == 1:
                print "# After modification -- UnRooting:"
                self.dre.unroot()
            elif mod == 2:
                print "# After modification -- Rooting (balanced):"
                self.dre.root_balanced()
            print self.dre
            print "\nDetails about tree:"
            self.dre.display()
            Phylo.draw_ascii(self.tree1)
            self.show()
            self.f.close()
Ejemplo n.º 13
0
def upgma_tree_constructor(x):
    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator("identity")
    dm = calculator.get_distance(x)
    upgmatree = constructor.upgma(dm)
    print(upgmatree)
    Phylo.draw_ascii(upgmatree)
Ejemplo n.º 14
0
def main():
    args = get_args()
    phyloTree = Phylo.read(args.newickFN, 'newick')
    if args.draw:
        Phylo.draw(phyloTree)
    else:
        Phylo.draw_ascii(phyloTree)
Ejemplo n.º 15
0
def nj_tree_constructor(x):
    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator("identity")
    dm = calculator.get_distance(x)
    njtree = constructor.nj(dm)
    print(njtree)
    Phylo.draw_ascii(njtree)
Ejemplo n.º 16
0
    def showAscii(self):
        self.tmpf = open('/tmp/ascii.txt', 'w')
        Phylo.draw_ascii(self.tree, self.tmpf)

        self.tmpf = open('/tmp/ascii.txt', 'r')
        with self.tmpf:
            self.textt += "\n" + self.tmpf.read()
Ejemplo n.º 17
0
def tree_to_str(input_file):
	tree = Phylo.read(input_file, "newick");
	out_handle_str = StringIO();
	Phylo.draw_ascii(tree, out_handle_str);
	str = out_handle_str.getvalue();
	out_handle_str.close();
	return str;
Ejemplo n.º 18
0
 def __str__(self):  # overide for print function
     Phylo.draw_ascii(self.phylo_tree)
     for node in sorted(self.node_to_conf.keys()):
         print(node, self.node_to_conf[node])
     print(' \nDuplication events: ', self.dup_events)
     return 'Tree newick file: ' + self.newicktree + '\n' + \
            'Tree duplos file: ' + self.duploslist + '\n'
Ejemplo n.º 19
0
def prettyprint_tree(tree, file=None):
    # Convert the "tree" object (list of clades) to a BioPython tree
    # to take advantage of their output methods
    def create_ntree(tree):
        ntree = BaseTree.Clade()
        for key in tree:
            el = tree[key]
            if len(el.values()) > 0:
                ntree.clades.append(create_ntree(el))
            else:
                ntree.clades.append(BaseTree.Clade(name=list(key)[0]))
        return ntree

    # Sort the clades from largest to smallest
    new_tree = sorted(tree, key=lambda x: -len(x))
    # Build a dictionary representation of the tree
    tree_dict = {}
    for clade in new_tree:
        tree_dict = create_tree_dict(tree_dict, clade)
    # Convert the dictionary representation to a BioPython Tree object
    ntree = BaseTree.Tree(create_ntree(tree_dict))
    # Use the BioPython print method
    Phylo.draw_ascii(ntree, file=file)
    try:
        Phylo.draw(ntree)
    except:
        pass
    return
Ejemplo n.º 20
0
    def showAscii(self):
        self.tmpf = open('/tmp/ascii.txt', 'w')
        Phylo.draw_ascii(self.tree, self.tmpf)

        self.tmpf = open('/tmp/ascii.txt', 'r')
        with self.tmpf:
            self.textt += "\n" + self.tmpf.read()
Ejemplo n.º 21
0
def clustalo(file, malign, treef):
    f = open('MultipleAlign/' + file, 'rb')
    payload = {'email': '*****@*****.**', 'sequence': f.read()}

    r = requests.post(
        "http://www.ebi.ac.uk/Tools/services/rest/clustalo/run/",
        data=payload,
    )
    f.close()
    print(r.text)
    time.sleep(20)

    f = open(malign, 'w')
    url = 'http://www.ebi.ac.uk/Tools/services/rest/clustalo/result/' + r.text + '/aln-clustal'
    re = requests.get(url)
    print(re.text, file=f)
    f.close()

    f = open('Phylotree.txt', 'w')
    url = 'http://www.ebi.ac.uk/Tools/services/rest/clustalo/result/' + r.text + '/phylotree'
    re = requests.get(url)
    print(re.text, file=f)
    f.close()

    f = open(treef, 'w')
    tree = Phylo.read('Phylotree.txt', "newick")
    Phylo.draw_ascii(tree, file=f)
    f.close()
Ejemplo n.º 22
0
    def add_mutants(self, reco_event, irandom):
        chosen_treeinfo = self.treeinfo[random.randint(0, len(self.treeinfo)-1)]
        chosen_tree = chosen_treeinfo.split(';')[0] + ';'
        branch_length_ratios = {}  # NOTE a.t.m (and probably permanently) the mean branch lengths for each region are the *same* for all the trees in the file, I just don't have a better place to put them while I'm passing from TreeGenerator to here than at the end of each line in the file
        for tmpstr in chosen_treeinfo.split(';')[1].split(','):  # looks like e.g.: (t2:0.003751736951,t1:0.003751736951):0.001248262937;v:0.98,d:1.8,j:0.87, where the newick trees has branch lengths corresponding to the whole sequence  (i.e. the weighted mean of v, d, and j)
            region = tmpstr.split(':')[0]
            assert region in utils.regions
            ratio = float(tmpstr.split(':')[1])
            if self.args.branch_length_multiplier != None:  # multiply the branch lengths by some factor
                # if self.args.debug:
                # print '    adding branch length factor %f ' % self.args.branch_length_multiplier
                ratio *= self.args.branch_length_multiplier
            branch_length_ratios[region] = ratio

        if self.args.debug:  # NOTE should be the same for t[0-9]... but I guess I should check at some point
            print '  using tree with total depth %f' % treegenerator.get_leaf_node_depths(chosen_tree)['t1']  # kind of hackey to just look at t1, but they're all the same anyway and it's just for printing purposes...
            Phylo.draw_ascii(Phylo.read(StringIO(chosen_tree), 'newick'))
            print '    with branch length ratios ', ', '.join([ '%s %f' % (region, branch_length_ratios[region]) for region in utils.regions])

        scaled_trees = self.get_rescaled_trees(chosen_tree, branch_length_ratios)
        # NOTE would be nice to parallelize this
        mutes = {}
        for region in utils.regions:
            mutes[region] = self.run_bppseqgen(reco_event.eroded_seqs[region], scaled_trees[region], reco_event.genes[region], reco_event, seed=irandom, is_insertion=False)
        mutes['vd'] = self.run_bppseqgen(reco_event.insertions['vd'], scaled_trees['v'], 'vd_insert', reco_event, seed=irandom, is_insertion=True)  # NOTE would be nice to use a better mutation model for the insertions
        mutes['dj'] = self.run_bppseqgen(reco_event.insertions['dj'], scaled_trees['j'], 'dj_insert', reco_event, seed=irandom, is_insertion=True)

        assert len(reco_event.final_seqs) == 0
        for iseq in range(len(mutes['v'])):
            seq = mutes['v'][iseq] + mutes['vd'][iseq] + mutes['d'][iseq] + mutes['dj'][iseq] + mutes['j'][iseq]  # build final sequence
            seq = reco_event.revert_conserved_codons(seq)  # if mutation screwed up the conserved codons, just switch 'em back to what they were to start with
            reco_event.final_seqs.append(seq)  # set final sequnce in reco_event

        assert not utils.are_conserved_codons_screwed_up(reco_event)
Ejemplo n.º 23
0
 def initUI(self):        
     #field for drawing Ascii tree
     self.textEdit = QtGui.QTextEdit()
     self.textEdit.setReadOnly(True)
     self.textEdit.setFontFamily('Courier')
     self.textEdit.setWordWrapMode(True)
     #self.textEdit.setStyleSheet('')
     
     # layout
     self.layout = QtGui.QVBoxLayout(self)
     self.layout.addWidget(self.textEdit)
     self.setLayout(self.layout)
     
     #print tree
     self.tmpf = open('/tmp/ascii.txt', 'w')
     Phylo.draw_ascii(self.tree, self.tmpf)
     
     self.tmpf = open('/tmp/ascii.txt', 'r')
     with self.tmpf:        
             self.data = self.tmpf.read()
             self.textEdit.setText(self.data)
     
     self.setGeometry(200, 200, 700, 400)
     self.setWindowTitle('Tekstowe wyswietlanie')
     self.show()
Ejemplo n.º 24
0
 def test_draw_ascii(self):
     """Tree to Graph conversion."""
     handle = StringIO()
     tree = Phylo.read(EX_APAF, 'phyloxml')
     Phylo.draw_ascii(tree, file=handle)
     Phylo.draw_ascii(tree, file=handle, column_width=120)
     handle.close()
Ejemplo n.º 25
0
def rand_tree(tips, brl_avg=1, brl_std=None, verbose='T'):
    """
	Creates random tree to do NNI moves on
	"""
    # Create random tree
    rand_tree = Phylo.BaseTree.Tree.randomized(taxa=tips,
                                               branch_length=brl_avg,
                                               branch_stdev=brl_std)

    # Convert to newick string, strip trailing whitespace
    rand_newick = rand_tree.format('newick').strip()

    # Remove root branch length of 0
    rand_newick2 = re.sub(':0.00000;', ';', rand_newick)

    # Remove node names, use this to read into dendropy if needed
    no_nodes_dp = re.sub('\)n\d+:', '):', rand_newick2)

    # Remove trailing ";" use this to read into readTree
    no_nodes = no_nodes_dp.strip(';')

    # Print stuff if you want to.
    if verbose == 'T':
        # Print newick string
        print(no_nodes)
        # View tree
        Phylo.draw_ascii(rand_tree)

    # Convert to readTree Tree object
    tree_random = Tree(no_nodes)
    return tree_random
Ejemplo n.º 26
0
def run(settings: dict):
    """Visualize phylo tree"""
    LOG.info('Visualizing phylo tree')
    prefix = f"{settings['data_dir']}/interim/{settings['pipeline']}"
    tree = Phylo.read(f"{prefix}_tree.nwk", 'newick')

    with open(f"{prefix}_tree.txt", 'w') as file:
        Phylo.draw_ascii(tree, file)
Ejemplo n.º 27
0
    def HapScorer(self, fn, mode, refid):

        if mode == "phylo":

            # phylogenetic tree

            of = fn.replace("alignments/", "alignments/aligned/")

            tn = of.strip(".fasta")+"_tree.dnd"

            with open(fn, "rb") as infile, open(of, "wb") as outfile:

                s.check_call("plugins/clustalo -i %s -o %s --auto --force --guidetree-out=%s" %(fn, of, tn),  shell=True)

            tree = Phylo.read(tn, "newick")

            names = []

            for clade in tree.find_clades():

                if clade.name and clade.name not in names:
                	
                        names.append(clade.name)

            tree.root_with_outgroup({refid})

            Phylo.draw_ascii(tree)

            distances = {}

            for hap in names:

                if "Patient" in hap:

                    continue

                else:

                    for i in range(1,3):

                        matches = []

                        dist = tree.distance("Patient_allele%i" %i, hap)

                        distances["al%i" %i] = dist

                    item = (hap, distances["al1"], distances["al2"])

                    self.insertValues("patienthaps", item)

            self.conn.commit()


        else:
            # standard mode
            pass

        self.conn.commit()
Ejemplo n.º 28
0
def phylogeny(filename):
        tree = Phylo.read(filename, "newick")
        print tree
        print Phylo.draw_ascii(tree)
        tree.rooted = True
        Phylo.draw(tree) 
        
#alignments("alinhamentos.phy")
#phylogeny("filogenia.dnd")
Ejemplo n.º 29
0
def view_phylo(tree_object):
    '''
	Visualize tree with biopython
	'''
    tree_newick = StringIO(tree_object.newick(tree_object.root))
    tree = Phylo.read(tree_newick, "newick")
    print("Tree")
    print(tree_object.newick(tree_object.root))
    Phylo.draw_ascii(tree)
Ejemplo n.º 30
0
def trim_tree(Infiles):
    """reads in a tree file and prints it"""
    #parse the tree using Phylo
    for treefile in Infiles:
        print "---------------------------"
        print "Tree File = {}".format(treefile)
        tree = Phylo.read(treefile, 'newick')
        print tree
        Phylo.draw_ascii(tree)
Ejemplo n.º 31
0
def trim_tree(Infiles):
    """reads in a tree file and prints it"""
    #parse the tree using Phylo
    for treefile in Infiles:
        print "---------------------------"
        print "Tree File = {}".format(treefile)
        tree = Phylo.read(treefile, 'newick')
        print tree
        Phylo.draw_ascii(tree)
Ejemplo n.º 32
0
def genTaxTree(resolver, by_ids = False, draw = False):
	"""Generate Newick tree from TaxonNamesResolver class.
        
        Arguments:
         resolver = TaxonNamesResolver class
         by_ids = Use taxon IDs instead of names (logical)
         draw = Draw ascii tree (logical)

         Return:
          (Newick Tree Object, [shared lineage])"""
        if by_ids:
            idents = resolver.retrieve('taxon_id')
            lineages = resolver.retrieve('classification_path_ids')
            ranks = resolver.retrieve('classification_path_ranks')
        else:
            idents = resolver.retrieve('name_string')
            lineages = resolver.retrieve('classification_path')
            ranks = resolver.retrieve('classification_path_ranks')
	for i, lineage in enumerate(lineages):
		lineage.reverse()
		lineages[i] = lineage
	for i, rank in enumerate(ranks):
		rank.reverse()
		ranks[i] = rank
	# make lineages of same ranks
	all_ranks = [e2 for e1 in ranks for e2 in e1]
	rank_freq =  collections.Counter(all_ranks).items()
	shared_ranks = [e for e, f in rank_freq if f == len(idents)]
	line_bool = [[1 if e2 in shared_ranks else 0 for e2 in e1] for e1 in ranks]
	lineages = [[lineages[i1][i2] for i2, e2 in enumerate(e1) if e2 == 1] for i1, e1 in enumerate(line_bool)]
	all_lines = [e2 for e1 in lineages for e2 in e1]
	line_freq =  collections.Counter(all_lines).items()
	shared_lineage = [e for e, f in line_freq if f == len(idents)]
	# TODO: if shared lineage is empty... drop radically different taxa
	# create line_obj, a tuple of ident and lineage
	line_obj = zip(idents, lineages)
	for i in range(len(lineages[0])):
		for uniq in set([each[1][i] for each in line_obj]):
			# find shared taxonomic groups
			new_node = [each[0] for each in line_obj if each[1][i] == uniq]
			if len(new_node) > 1:
				# extract shared lineage
				lineage = [each[1] for each in line_obj if each[0] == new_node[0]]
				# remove shareds from line_obj
				line_obj = [each for each in line_obj if not each[0] in new_node]
				# convert to strings
				new_node = [str(each) for each in new_node]
				# add new node to line_obj
				new_node = ('(' + ','.join(new_node) + ')', lineage[0])
				line_obj.append(new_node)
		if len(line_obj) < 1:
			break
        tree = Phylo.read(StringIO(line_obj[0][0] + ';'), "newick")
        if draw:
            Phylo.draw_ascii(tree)
	return (tree, shared_lineage)
Ejemplo n.º 33
0
 def clustal_tree(self, ids_seqs):
     '''
     Função que constrói a árvore filogenética com todas as sequências do 
     gestor com auxilio do programa ClustalW 
     '''
     self.write_fasta(ids_seqs, file_name='All_seqs.fasta')
     cmdline = ClustalwCommandline('clustalw2', infile='All_seqs.fasta')
     cmdline()
     tree = Phylo.read('All_seqs.dnd', 'newick')
     Phylo.draw_ascii(tree)
Ejemplo n.º 34
0
def grow_tree_recover(m, n, k, proba_bounds, verbose=True):
    reference_tree = random_discrete_tree(m, n, k, proba_bounds=proba_bounds)
    if verbose:
        reference_tree.root.ascii()
    observations, labels = reference_tree.root.observe()
    inferred_tree = estimate_tree_topology_multiclass(observations, labels=labels)
    if verbose:
        Phylo.draw_ascii(inferred_tree)
    NoahClade.tree_Fscore(inferred_tree, reference_tree)
    print(NoahClade.equal_topology(inferred_tree, reference_tree))
Ejemplo n.º 35
0
def arbol_parsimonia(archivo,formato):
    aln = AlignIO.read(archivo, formato)
    NJ,UPGMA = arboles(archivo,formato)
    starting_tree = NJ
    scorer = ParsimonyScorer()
    searcher = NNITreeSearcher(scorer)
    constructor = ParsimonyTreeConstructor(searcher, starting_tree)
    pars_tree = constructor.build_tree(aln)
    print("Arbol Parsimonia")
    Phylo.draw_ascii(pars_tree)
Ejemplo n.º 36
0
def drawTree(treeFile):
  '''
   - Displays a dendogram of the tree generated from cluster representatives
  '''
  
  print('\nThe phylogenetic tree for the cluster representatives is shown below:\n')
  tree = Phylo.read(treeFile,'newick')
  Phylo.draw_ascii(tree)
  print('\n')
  '''
Ejemplo n.º 37
0
 def draw_tree_alignment(self):
     if self.alignment_in_window == True and self.data_loaded:
         self.clear_align_window()
         tree = Phylo.read(r"./tmp_files/tmp_amino_acids.dnd", "newick")
         with open(r"./tmp_files/tmp_ascii_tree", "w") as fh:
             Phylo.draw_ascii(tree, file=fh, column_width=70)
         
         for line in open(r"./tmp_files/tmp_ascii_tree"):
             self.align_text.insert(tkinter.INSERT, line)
         self.tree_in_window = True
Ejemplo n.º 38
0
def phylo_tree(dnd, draw):
    """
    Takes a dnd file and draws a phyloginetic tree. If draw is False draws ascii tree.
    """
    from Bio import Phylo
    tree = Phylo.read("lab3.dnd", "newick")
    if draw:
        Phylo.draw(tree)
    else:
        Phylo.draw_ascii(tree)
def tree(): #obter as arvores filogeneticas em formato ascii
    from Bio import Phylo
    try:
        lista = interesting_list
        for i in range(len(lista)):
            align_tree = Phylo.read(("Malign" + str(i+1) + ".phy_phyml_tree.txt"), "newick")
            print("Happy tree " + str(i+1) +"! " + (interesting_list[i]))
            Phylo.draw_ascii(align_tree)
    except:
        print("Creating Tree error!")
Ejemplo n.º 40
0
    def draw(self):
        """
        visualize the phylo tree
        """
        mat = list(
            map(lambda x: list(filter(lambda x: x > 0, x)),
                self.distMat.tolist()))
        constructor = DistanceTreeConstructor()
        upgmatree = constructor.upgma(DistanceMatrix(self.names, mat))

        Phylo.draw_ascii(upgmatree)
Ejemplo n.º 41
0
def draw_tree(NewickFile, Results_Dir=''):
    tree = Phylo.read(NewickFile, 'newick')
    tree.rooted = True
    tree.ladderize()
    TreeFile = open(Results_Dir + "Tree.txt", "w")
    TreeFile.write('\n' +
                   ' Simple plot of Neighbor Joining Tree '.center(80, '-') +
                   '\n\n')
    Phylo.draw_ascii(tree, file=TreeFile)

    return
Ejemplo n.º 42
0
def get_phylo_tree(fasta_file):
    try:
        cline = ClustalwCommandline("clustalw2", infile=fasta_file)
        stdout, stderr = cline()
        tree = Phylo.read("%s.dnd" % fasta_file, "newick")
        representation_file = '%s.tree' % fasta_file
        with open(representation_file, 'w') as output_file:
            Phylo.draw_ascii(tree, output_file)
        return representation_file
    except Exception, e:
        return 'Error generating phylo tree: %s' % str(e)
Ejemplo n.º 43
0
def main(args):
    '''
    here we do command line processing, parse files, display output, etc
    '''
    if len(args) < 3:
        raise ArgError("Must supply 3 arguments: msa_file tree_file cutoff_percent")

    # would like to do some more error checking here!
    msa_file = args[0]
    tree_file = args[1]
    cutoff_percent = float(args[2])

    # hardcoded MSA_FORMAT ('fasta') for now
    msa_iter = SeqIO.parse(msa_file, MSA_FORMAT)
    msa_list = list(msa_iter)

    # this could be empty, let's check to make sure it's not
    if not msa_list:
        raise ParseError("No MSAs found in msa file '%s'" % msa_file)

    # Phylo's IO will raise an exception if it's a bad file
    tree = Phylo.read(tree_file, INPUT_PHYLO_FORMAT)

    # the original:
    #Phylo.draw_ascii(tree)
    #print
    #print('-'*80)
    #print

    helper = KerfHelper(msa_list, tree)
    sub_trees = do_kerf_split(tree, helper, cutoff_percent)

    # write the output to named files in the cwd
    write_output = True
    # draw the output to the screen
    draw_output = True

    if write_output:
        write_output_files(msa_list, sub_trees, helper)

    if draw_output:
        print
    
        for sub_tree in sub_trees:
            Phylo.draw_ascii(sub_tree)
            print
            print('-'*80)

            print
            print "%d trees" % len(sub_trees)

    return 0
Ejemplo n.º 44
0
    def align (self):
        clustalw2 = "./clustalw2"
        assert os.path.isfile(clustalw2), "Clustal W executable missing"

        cline = ClustalwCommandline(clustalw2, infile=self.fasta_file)
        print "Aligning fasta files.."
        stdout, stderr = cline ()

        align = AlignIO.read(self.fasta_base+".aln", "clustal")
        print align

        tree = Phylo.read(self.fasta_base + ".dnd", "newick")
        Phylo.draw_ascii(tree)
Ejemplo n.º 45
0
def main():
    pattern = list('abcdefg')
    shuffle(pattern)
    print ''.join(pattern)

    tree = reduce(binary_tree_add, pattern, None)

    assert binary_tree_find(tree, 'a') == True
    assert binary_tree_find(tree, 'g') == True
    assert binary_tree_find(tree, 'z') == False

    output_tree = Phylo.read(StringIO(str(tree)), "newick")
    Phylo.draw_ascii(output_tree)
Ejemplo n.º 46
0
def prettyprint_tree(tree):
	def create_ntree(tree):
		ntree = BaseTree.Clade()
		for key in tree:
			el = tree[key]
			if type(el) == dict:
				ntree.clades.append(create_ntree(el))
			else:
				ntree.clades.append(BaseTree.Clade(name=list(key)[0]))
		return ntree
	ntree = BaseTree.Tree(create_ntree(tree))
	Phylo.draw_ascii(ntree)
	Phylo.draw(ntree)
	return
Ejemplo n.º 47
0
def tree(fas, RefPro, clean):
    try:
        if not os.path.exists('phy/' + fas + ".phy_phyml_tree.txt") or clean:
            phytype = 'nt'
            if RefPro:
                phytype = 'aa'
            cmdline = PhymlCommandline(input='phy/' + fas + ".phy", datatype=phytype, alpha='e', bootstrap=10)
            print(str(cmdline) + '\n')
            cmdline()
            egfr_tree = Phylo.read('phy/' + fas + ".phy_phyml_tree.txt", "newick")
            Phylo.draw_ascii(egfr_tree)
    except Exception as e:
        print 'WARNING: BAD TREE'
        print e 
Ejemplo n.º 48
0
    def add_mutants(self, reco_event, irandom):
        chosen_treeinfo = self.treeinfo[random.randint(0, len(self.treeinfo)-1)]
        chosen_tree = chosen_treeinfo.split(';')[0] + ';'
        branch_length_ratios = {}  # NOTE a.t.m (and probably permanently) the mean branch lengths for each region are the *same* for all the trees in the file, I just don't have a better place to put them while I'm passing from TreeGenerator to here than at the end of each line in the file
        for tmpstr in chosen_treeinfo.split(';')[1].split(','):  # looks like e.g.: (t2:0.003751736951,t1:0.003751736951):0.001248262937;v:0.98,d:1.8,j:0.87, where the newick trees has branch lengths corresponding to the whole sequence  (i.e. the weighted mean of v, d, and j)
            region = tmpstr.split(':')[0]
            assert region in utils.regions
            ratio = float(tmpstr.split(':')[1])
            if self.args.mutation_multiplier is not None:  # multiply the branch lengths by some factor
                # if self.args.debug:
                # print '    adding branch length factor %f ' % self.args.mutation_multiplier
                ratio *= self.args.mutation_multiplier
            branch_length_ratios[region] = ratio

        if self.args.debug:  # NOTE should be the same for t[0-9]... but I guess I should check at some point
            print '  using tree with total depth %f' % treegenerator.get_leaf_node_depths(chosen_tree)['t1']  # kind of hackey to just look at t1, but they're all the same anyway and it's just for printing purposes...
            if len(re.findall('t', chosen_tree)) > 1:  # if more than one leaf
                Phylo.draw_ascii(Phylo.read(StringIO(chosen_tree), 'newick'))
            else:
                print '    one leaf'
            print '    with branch length ratios ', ', '.join(['%s %f' % (region, branch_length_ratios[region]) for region in utils.regions])

        scaled_trees = self.get_rescaled_trees(chosen_tree, branch_length_ratios)
        treg = re.compile('t[0-9][0-9]*')
        n_leaf_nodes = len(treg.findall(chosen_tree))
        cmdfos = []
        for region in utils.regions:
            simstr = reco_event.eroded_seqs[region]
            if region == 'd':
                simstr = reco_event.insertions['vd'] + simstr + reco_event.insertions['dj']
            cmdfos.append(self.prepare_bppseqgen(simstr, scaled_trees[region], n_leaf_nodes, reco_event.genes[region], reco_event, seed=irandom))

        utils.run_cmds([cfo for cfo in cmdfos if cfo is not None], sleep=False)  # shenanigan is to handle zero-length regional seqs

        mseqs = {}
        for ireg in range(len(utils.regions)):
            if cmdfos[ireg] is None:
                mseqs[utils.regions[ireg]] = ['' for _ in range(n_leaf_nodes)]  # return an empty string for each leaf node
            else:
                mseqs[utils.regions[ireg]] = self.read_bppseqgen_output(cmdfos[ireg], n_leaf_nodes)

        assert len(reco_event.final_seqs) == 0
        for iseq in range(n_leaf_nodes):
            seq = mseqs['v'][iseq] + mseqs['d'][iseq] + mseqs['j'][iseq]
            seq = reco_event.revert_conserved_codons(seq)  # if mutation screwed up the conserved codons, just switch 'em back to what they were to start with
            reco_event.final_seqs.append(seq)  # set final sequnce in reco_event

        self.add_shm_indels(reco_event)
Ejemplo n.º 49
0
    def display(self, isascii=False):
        """
        Loads the tree from a file. And displays it.
        """
        self.tree = Phylo.read('{}.dnd'.format(self.filename), 'newick')
        
        if isascii:
            Phylo.draw_ascii(self.tree)

        try:
            import pylab
            Phylo.draw_graphviz(self.tree)
            pylab.show()
        except:
            print('Warning: failed to display using graphviz')
            Phylo.draw_ascii(self.tree)
Ejemplo n.º 50
0
def filter(input, informat, outformat, tip_labels):
	tree = Phylo.read(input, informat)
# 	print tree
	tip_names_original = [tip.name for tip in tree.get_terminals()]
# 	print tip_names_original
	with open(tip_labels, 'r') as tip_names:
		tip_list_input = [line.rstrip('\n') for line in tip_names]
		tips_to_prune = [i for i in tip_names_original if i not in tip_list_input]
# 		print tips_to_prune
		for i in tips_to_prune:
			tree.prune(i)
		# print str(tree)
		with open(input+'_subtree_pruned.'+outformat+'.tre', 'w') as output_handle:
			output_tree.append(str(output_handle))
			Phylo.draw_ascii(tree)
			Phylo.write(tree, output_handle, outformat)
def trim_tree(absenteeList, TreeFile, Inclusive):
    """Collapse away species from the phylogenetic tree that
    are not found in this sequence file. Output the tree file."""
    print "\nReading the Tree..."
    #parse the tree using Phylo
    tree = Phylo.read(TreeFile, 'newick')
    print "Here is the starting tree:"
    Phylo.draw_ascii(tree)
    terminals = tree.get_terminals()
    print "\nFound the following {} taxa in the tree:".format(len(terminals))
    print terminals
    #prune away taxa that are not included for this sequence file
    for taxon in absenteeList:
        tree.prune(taxon)
        if CladeList != "none":
            if taxon in CladeList:
                CladeList.remove(taxon)
    print "\nPruned away these species:"
    print absenteeList
    print "\nHere is the tree with the missing taxa pruned away:\n"
    Phylo.draw_ascii(tree)
    #unless you have a clock, PAML requires that your tree is unrooted, ie has a trifurcation at first node. So do that here
    ROOT = tree.get_nonterminals()[0]
    if ROOT.is_bifurcating() == True:
        firstNode = tree.get_nonterminals()[1]
        tree.collapse(firstNode)
    #add notations to the tree to identify the 'foreground' branches
    #these are assigned to a monophyletic group of species assigned with the argument -clade
    #by default add "#1" to the branch leading to the clade. Change -inc from 'no' to make it inclusive,
    #adding #1 to the branch leading to the clade as well as all terminal branches.
    if Model == "2":
        print "\nAssigning the foreground branches in the tree based on the species given in the clade file..."
        print "These species make up the forground clade:"
        for spp in CladeList:
            print spp
        #identifying the foreground clade works differently depending on whether there are multiple species or just one
        #deal with the case when there are multiple first
        if len(CladeList) > 1:
            #add #1 to the node representing the common ancestor to your clade of interest, identifying it as the foreground lineage for the branch sites model
            tree.common_ancestor(CladeList).name = "#1"
            #if you want the foreground lineage to be inclusive for terminal branches, then add the #1s to the terminal taxa in the clade
            if Inclusive != 'no':
                for leaf in tree.get_terminals():
                    if leaf.name in CladeList:
                        leaf.name = leaf.name + "#1"
        #if there is only one member of the clade list left, then it is the sole representative for the lineage, and should be marked #1
        else:
            for leaf in tree.get_terminals():
                if leaf.name in CladeList:
                    leaf.name = leaf.name + "#1"
    #if RunMode is not 2 just output the pruned tree as is
    print "\nOutputting the following revised tree for the species content of the sequence file"
    print "it should have a trifurcation at the base unless you are using a clock\n"
    Phylo.draw_ascii(tree)
    # if tree.rooted == False:
    #     print "The revised tree is an unrooted tree (regardless of how the sketch above looks)"
    # if tree.rooted == True:
    #     print "Hmm, the tree is rooted. This may not be right for PAML input. You should check."
    Phylo.write(tree, TreeOutFileName, "newick")
Ejemplo n.º 52
0
def view_tree(phyfile):
    tree = Phylo.read(phyfile+"_phyml_tree.txt", "newick")
    Phylo.draw_ascii(tree,column_width=300)
    #Phylo.draw_graphviz(tree,prog="neato",node_size=50)
    #pylab.show()
    Phylo.draw(tree,do_show=True,show_confidence=False)
def tree(q):#Reads alignment files from Clustal and ouputs tree
    tree = Phylo.read("D:\{}.dnd".format(q), "newick")
    return Phylo.draw_ascii(tree)
Ejemplo n.º 54
0
def createNewickTreeDrawGraphviz(name,name1):
    tree=Phylo.read(name,"newick")
    target = open(name1,'w+');
    Phylo.draw_ascii(tree,target)
    target.close();
 def draw_tree(self):
     Phylo.draw_ascii(self.tree)
Ejemplo n.º 56
0
			try:
				chars[names[col]].append(charvals[values.index(s[col])])
			except KeyError,ValueError:
				continue
		
	for key in chars:
		print key
		print len(chars[key])
	phyfilename = 'Afro'+'.phy'
	phyfile = open(phyfilename,'w')
	phyfile.write(str(len(chars.keys())) + ' ' + str(len(chars[chars.keys()[0]])) + '\n')
	for key in chars.keys():
		newkey = ''.join(key.split('-'))
		newkey += ''.join([' ']*int(20-len(newkey)))
		phyfile.write(newkey + ' ' + ''.join(chars[key]) + '\n')
		
	phyfile.close()
	t0 = time.time()
	aln = AlignIO.read(open(phyfilename), 'phylip-relaxed')
	scorer = ParsimonyScorer()
	searcher = NNITreeSearcher(scorer)
	constructor = ParsimonyTreeConstructor(searcher)
	pars_tree = constructor.build_tree(aln)
	timed = time.time()-t0
	print arg
	Phylo.draw_ascii(pars_tree)
	try:
		Phylo.draw(pars_tree)
	except:
		pass
Ejemplo n.º 57
0
            temp = ances_taxa[key][i]
            min_idx = temp.index(min(temp))
            sequence=sequence+mat_list[min_idx]
    ancestor[key] = sequence

#####################################################################################################
#creating the output file
outfile = file_path[:-19]+"out.txt"
with open(outfile, "w") as f_out:
    f_out.writelines("\tSankoff Algorithm on RNA Family "+file_path[:-19]+"\n")
    f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n")
    f_out.write("\tTree Sequence\n\n")
    f_out.writelines("\t"+treeseq+"\n")
    f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n")
    f_out.write("\tTree Structure\n")
    Phylo.draw_ascii(tree, file=f_out)
    f_out.write("\tp\t[lc, rc]\n\n")
    for key in tree_dict.iterkeys():
        f_out.write("\t"+str(key)+"\t"+str(tree_dict[key])+"\n")
    f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n")
    f_out.write("\tLeaf Nodes\n\n")
    for key in name_map.iterkeys():
        f_out.write("\t"+str(key)+"\t"+name_map[key]+"\n")
    f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n")
    f_out.write("\tLeaf Sequence\n\n")
    for key in tree_taxa.iterkeys():
        f_out.write("\t"+tree_taxa[key]+"\t"+str(key)+"\n")
    f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n")
    f_out.write("\tCost Matrix\n\n")
    pprint(mat_list, stream=f_out)
    pprint(cost_mat, stream=f_out)
#    for rec in gaps:
#        print rec + "\t" + str(gaps[rec])

#calculate all distances, then drop taxa with the smallest distance repeatedly until remaining number of taxa is n.

all_distances = calc_all_distances(tree)
	
#now do recursive drop
num_taxa = len(taxa)
while num_taxa > taxa_to_keep:
    #get smallest distance, and drop one of the two taxa according to some other property, maybe
    print str(num_taxa)
    to_prune = tree_distances(taxa, all_distances)
    tree.prune(to_prune)
    taxa = tree.get_terminals()
    num_taxa = len(taxa)

for tip in taxa:
    print tip.name
Phylo.draw_ascii(tree)

aln_seqs = {} #totally unnecessary?
for record in alignment:
    aln_seqs[record.id] = str(record.seq)

if len(sys.argv) > 3:
    outh = open(sys.argv[3] + "_reduced.fasta", "w")
    for tip in taxa:
        outh.write(">" + str(tip.name) + "\n" + str(aln_seqs[tip.name]) + "\n")
    outh.close()