예제 #1
0
 def test_treefile_source(self):
     """
     Test PhyTrees.from_treefile() and PhyTrees.write() methods.
     """
     infile = 'Newick/f002.trees.newick'
     self.assertTrue(os.path.isfile(infile))
     tree_db = PhyTrees.from_treefile(infile, 'newick')
     outfile = 'tmp_test.newick'
     outrepfile = 'tmp_test.rep'
     self.files_to_clean.add(outfile)
     self.files_to_clean.add(outrepfile)
     tree_db.write(outfile)
     self.assertTrue(os.path.isfile(outfile))
     # Check the content of both sequence files
     self.assertEqual(len(list(Phylo.parse(infile, 'newick'))),
                      len(list(Phylo.parse(outfile, 'newick'))))
     # Check the content of the report file
     with open(outrepfile, 'r') as repfile:
         for line in repfile.readlines():
             self.assertTrue(
                 ('Num. trees: 9' in line) or ('History:' in line) or (bool(
                     re.match(
                         r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
                                    [ ]+.*Tests/Newick/f002\.trees\.newick
                                    \ +newick""", line, re.VERBOSE))))
def make_paml_trees(alignment_loc, tree_loc, test=False):
    all_species = SPECIES_NAMES
    sample_output = []
    cnt_empty = 0
    empty = []

    for folder in FOLDERS:

        if test:
            alignments = glob.glob(os.path.join(alignment_loc, 'aln_*'))
        else:
            alignments = glob.glob(os.path.join(alignment_loc, folder,
                                                'aln_*'))

        trees = Phylo.parse('yeast_tree_topology.txt', "newick")

        try:
            os.makedirs(os.path.join("trees", folder))
        except OSError:
            if not os.path.join(tree_loc, folder):
                raise

        for item in alignments:
            species = []
            trees = Phylo.parse('yeast_tree_topology.txt', "newick")
            with open(item) as f_in:
                lines = f_in.readlines()
                if len(lines) == 0:
                    cnt_empty += 1
                    empty.append(item)
                    break

                for line in lines:
                    if line[0].islower() and line[0] != '-':
                        species.append(line.split()[0][:10])

            cut_nodes = list(set(all_species).difference(species))

            for tree in trees:
                for node in cut_nodes:

                    tree.prune(node)
            gene_name = os.path.basename(item)[4:]
            if test:
                file_loc = os.path.join(tree_loc, "tre_%s" % gene_name)
                Phylo.write(tree, file_loc, "newick")

            else:
                file_loc = os.path.join(tree_loc, folder, "tre_%s" % gene_name)
                Phylo.write(tree, file_loc, "newick")
    if test:
        with open(file_loc, 'r') as f_in:
            sample_output = f_in.readlines()
    return sample_output
예제 #3
0
def fool_around_with_trees():
    trees = Phylo.parse('ape.tree', 'nexus')
    for tree in trees:
        if tree.name == 'STATE_0':
            print('success')
            tree_state0 = tree

    print(tree_state0.get_path('human'))
    print('total_branch length=>' + str(tree_state0.total_branch_length()))
    print('depths=>' + str(tree_state0.depths()))
    print('distance human urangutan=>' + str(tree_state0.distance('human',
                                                                  'orangutan')))  # OK matches met mijn berekeningen !

    # draw a tree /works
    tree_state0.ladderize()
    Phylo.draw(tree_state0)

    # nicer graph / doesn't work
    # import pylab
    # Phylo.draw_graphviz(tree_state0)
    # pylab.show()

    # ASCII tree /works
    Phylo.draw_ascii(tree_state0)

    # network graph : works, but not a clear figure

    net = Phylo.to_networkx(tree_state0)
    networkx.draw(net)
    pylab.show()

    # doesn't work
    # from io import StringIO
    # treedata = '[&R] ((5:0.04847887618847128,(3:0.03189934232093919,((2:0.011076861832266626,1:0.011076861832266626):0.009810542752873795,4:0.02088740458514042):0.011011937735798769):0.01657953386753209):0.017232517763959114,6:0.06571139395243039);'
    # handle = StringIO(treedata)
    # tree = Phylo.read(handle, 'nexus')

    # conensus trees / works, not very informative

    trees = list(Phylo.parse('ape.tree', 'nexus'))
    strict_tree = strict_consensus(trees)
    majority_tree = majority_consensus(trees, 0.5)
    adam_tree = adam_consensus(trees)
    #
    Phylo.draw(strict_tree)
    Phylo.draw(majority_tree)
    Phylo.draw(adam_tree)

    # ok werkt ! path van human -> root (4 nodes, of clades zoals het hier
    # wordt genoemd)
    return()
예제 #4
0
파일: tree.py 프로젝트: alliblk/augur
 def check_newick(self, newick_file):
     try:
         tree = Phylo.parse(newick_file, 'newick').next()
         assert(set([x.name for x in tree.get_terminals()]) == set(self.sequence_lookup.keys()))
         return True
     except:
         return False
예제 #5
0
 def test_convert_phyloxml_filename(self):
     """Write phyloxml to a given filename."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     tmp_filename = tempfile.mktemp()
     count = Phylo.write(trees, tmp_filename, "phyloxml")
     os.remove(tmp_filename)
     self.assertEqual(13, count)
예제 #6
0
 def test_convert_phyloxml_filename(self):
     """Write phyloxml to a given filename."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     tmp_filename = tempfile.mktemp()
     count = Phylo.write(trees, tmp_filename, "phyloxml")
     os.remove(tmp_filename)
     self.assertEqual(13, count)
예제 #7
0
    def RootTree_cnv(self, OriNwk, Root):

        OutF = open('test.nwk', 'w')
        OutF.write(OriNwk)
        OutF.close()

        trees = list(Phylo.parse('test.nwk', 'newick'))

        for tree in trees:
            tree = tree.root_with_outgroup({'name': Root})

        Phylo.write(trees, 'newtree.nwk', "newick")

        Tree = open('newtree.nwk', 'r').readlines()[0].strip()
        Len = len(Tree)
        Posi = Tree.find(',' + Root + ':0.00000')
        PosRev = -1 * (Len - Posi)

        LastBraLen = ''
        Rm = ''
        while Tree[PosRev] != ':':
            LastBraLen += Tree[PosRev]
            PosRev = PosRev - 1
        BraLen = LastBraLen[::-1]
        # print BraLen
        NewTree = '(' + Root + ':' + BraLen + Tree[2:].replace(
            '):' + BraLen + Root + ':0.00000', '') + '\n'
        #  print NewTree
        return NewTree
예제 #8
0
파일: preprocess.py 프로젝트: jwayne/mol455
def convert_boottrees(fname_trees):
    out_fnames = []
    for i, tree in enumerate(Phylo.parse(fname_trees, "newick")):
        fname_tree = "%s.codeml-%d" % (fname_trees, i)
        Phylo.write(tree, fname_tree, "newick")
        out_fnames.append(fname_tree)
    return out_fnames
예제 #9
0
    def from_treefile ( cls, treefile, fileformat ) :
        """
        Create a PhyTrees object retrieving all the information stored at the
        tree file provided. If 'treefile' contains a relative path, the current
        working directory will be used to get the absolute path.
        
        Arguments :
            treefile  ( string )
                Input tree file.
            fileformat  ( string ) 
                Input file format.

        Raises :
            IOError
                If the path or the file provided doesn't exist.

        * The file format must be supported by Bio.Phylo.
        * If the file format provided doesn't correspond to the actual file
        format, an empty tree list will be created.
        """
        filepath = get_abspath(treefile)
        # Read the tree file and create a new PhyTrees object, generating a new
        # report list
        tree_list = list(Phylo.parse(filepath, fileformat))
        date_time = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
        report = [(date_time, filepath, fileformat)]
        return ( cls(tree_list, report) )
예제 #10
0
    def __init__(self, taxon_term_table, panther_tree_nhx, slim_terms=None):
        self.term_constraint_lists = {}
        self.taxon_indexes = {}
        self.slim_terms = []
        self.tree = None

        if slim_terms:
            # Get list of slim terms to filter for
            slim_file = open(slim_terms)
            for t in slim_file.readlines():
                self.slim_terms.append(t.rstrip())
            slim_file.close()
        
        with open(taxon_term_table) as t3f:
            header = t3f.readline().rstrip()
            headers = header.split("\t")
            index_count = 0
            for h in headers[1:len(headers)]:
                self.taxon_indexes[h] = index_count
                index_count += 1

            for l in t3f.readlines():
                cols = l.split("\t")
                go_term = cols[0]
                if len(self.slim_terms) == 0 or go_term in self.slim_terms:
                    self.term_constraint_lists[go_term] = cols[1:len(cols)]

        logger.debug("taxon_indexes: {}".format(len(self.taxon_indexes)))
        logger.debug("term_constraint_lists: {}".format(len(self.term_constraint_lists)))

        # Parse species_tree
        self.tree = next(Phylo.parse(panther_tree_nhx, "newick"))
        self.tree.clade.name, self.tree.clade.id = extract_clade_name(self.tree.clade.comment)
        name_children(self.tree.clade)
예제 #11
0
def parse(file):
    trees = Phylo.parse(file, "newick").__next__()

    levels = trees.depths(
        unit_branch_lengths=True
    )  # returns a dictionary of pairs (Clade name : depth)

    root = list(levels.keys())[list(levels.values()).index(0)]
    for key in levels.keys():  # loop that finds the name of the root node
        if levels[key] == 0:
            break
    global rootnode
    rootnode = Node(root.name, levels[key], root.count_terminals(),
                    root.clades)
    global maxDepth
    maxDepth = max(levels.values())
    clade_list = trees.find_clades()
    names_list = levels.keys()
    global nodes
    nodes = []  # this is the list that will contain all nodes

    for Clade in clade_list:  # calculates properties and creates nodes
        node_name = Clade.name
        node_children = Clade.clades
        node_leaves = Clade.count_terminals()
        if Clade in names_list:
            node_depth = levels[Clade]
        else:
            node_depth = 0
        nodes.append(Node(node_name, node_depth, node_leaves, node_children))
        return
 def newTreeInProject(self, treename, treefile, projectTitle, treetype):
     import phyloimport_algorithm, root_phylotree_algorithm
     collectionName = self.returnCollectionForObjectByName(
         projectTitle, 'PhyloTree', treename)
     #collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename
     treeCollection = self.db[collectionName]
     print "uploading tree to collection: ", collectionName
     print "treetype is: ", treetype
     # create the new collection in mongo for this tree
     trees = Phylo.parse(treefile, treetype)
     #print "length of trees list: ",len(trees)
     for tree in trees:
         #process tree
         phyloimport_algorithm.recursive_clade(tree, treeCollection)
         root_phylotree_algorithm.addRootToTree(treeCollection)
         # add a tree record entry to the 'PyloTree' array in the project record
         self.db[self.projectCollectionName].update(
             {"name": projectTitle},
             {'$push': {
                 u'PhyloTree': {
                     treename: treefile
                 }
             }})
         self.db[self.projectCollectionName].update(
             {"name": projectTitle},
             {'$addToSet': {
                 u'datatypes': u'PhyloTree'
             }})
예제 #13
0
def read_trees(fname, species_polyploid, tree_repeats):
    """
    Reads input gene trees.
    
    Args:
        fname: tree file (one newick string per line)
        species_polyploid: list of Species objects
        tree_repeats: number of trees per locus
                
    Returns:
        list of parsed trees
    """

    # remove trailing whitespace from each line
    with open(fname, "r") as f:
        input_data = [line.rstrip() for line in f.readlines()]

    # count and remove trailing newlines
    nt = 0
    for line in reversed(input_data):
        if line:
            break
        else:
            nt += 1
    if nt > 0:
        del input_data[-nt:]

    input_data = rename_polyploids(input_data, species_polyploid,
                                   tree_repeats)  # append marker IDs
    input_data = io.StringIO("\n".join(input_data))  # create file handle
    input_data_trees = list(Phylo.parse(input_data, "newick"))

    return input_data_trees
예제 #14
0
    def from_phytrees ( cls, phytrees_file ) :
        """
        Create a PhyTrees object retrieving all the information from previously
        saved PhyTrees tree and report files. If 'phytrees_file' contains a
        relative path, the current working directory will be used to get the
        absolute path.
        
        Arguments :
            phytrees_file  ( string )
                Tree file generated by PhyTrees.write().

        Raises :
            ValueError
                If the number of trees read doesn't match the number stored in
                the report document.
        """
        data_filepath = get_abspath(phytrees_file)
        report_filepath = os.path.splitext(data_filepath)[0] + '.rep'
        # Load all the contents into a new PhyTrees object
        tree_list = list(Phylo.parse(data_filepath, 'newick'))
        report = []
        with open(report_filepath, 'r') as report_file :
            str_num_trees = report_file.readline()
            num_trees = int(str_num_trees.split(':')[-1])
            if ( len(tree_list) != num_trees ) :
                message = 'The number of trees at report file doesn\'t match ' \
                          'the number of trees loaded'
                raise ValueError(message)
            # Ignore "History:" line
            report_file.readline()
            for line in report_file.readlines() :
                date_time, filepath, fileformat = line.strip().split('    ')
                report.append((date_time, filepath, fileformat))
        return ( cls(tree_list, report) )
예제 #15
0
def main(nexusfile, reftree, burnin=10):

    # Using the Nexus module
    data = Nexus.Nexus(nexusfile)
    taxlabels = data.structured[1].commandlines[1].options.split()
    nb2taxlabels = data.translate
    trees = data.trees
    # Using the Phylo module
    trees = list(Phylo.parse(nexusfile, 'nexus'))

    N0 = len(trees)

    trees = trees[N0 * burnin / 100 + 1:]
    N = N0 * (100 - burnin) / 100

    topologies = Counter()
    topo_groups = defaultdict(list)

    for tree in trees:
        # Ensure all equivalent topologies will be represented the same way
        biophylo_leaf_sort(tree, tree.root)
        topo = biophylo_topology(tree, tree.root)
        topologies[topo] += 1
        topo_groups.append(tree)

    MAP_topology, MAP_count = topologies.most_common(1)[0]
    MAP_proba = float(MAP_count) / sum(topologies.values())

    clades = represent_clades(reftree, BioPhylo.get_children,
                              BioPhylo.get_label)
예제 #16
0
파일: phyloK2.py 프로젝트: PoonLab/kamphir
    def load_trees_from_file(self, handle):
        """
        Parse a file containing Newick tree strings
        """
        self.trees = []

        tree_iter = Phylo.parse(handle, 'newick')

        for t in tree_iter:
            if self.rotate == 'ladder':
                t.ladderize()
            elif rotate == 'random':
                scramble(t)
            else:
                pass

            if self.rotate2 == 'none':
                pass
            else:
                gravitate(t, subtree=subtree, mode=rotate2)

            if self.normalize != 'none':
                self.normalize_tree(t, mode=self.normalize)
            if self.resolve_poly:
                collapse_polytomies(t)

            self.annotate_tree(t)
            self.trees.append(t)

        self.kmat = [[0 for i in self.ntrees] for j in self.ntrees]
        #self.kmat = zeros( (self.ntrees, self.ntrees) )
        self.is_kmat_computed = False
        self.delta_values = {}
예제 #17
0
 def check_newick(self, newick_file):
     try:
         tree = Phylo.parse(newick_file, 'newick').next()
         assert(set([x.name for x in tree.get_terminals()]) == set(self.sequence_lookup.keys()))
         return True
     except:
         return False
예제 #18
0
    def _calculate_gsi(self):
        """
        Method for calculating Gene Support Indices
        :return:
        """
        LOGGER.info("Calculating Gene Support Indices (GSIs)"
                    " from the gene trees..")
        genome_num = 0
        bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir)
        for file in os.listdir(bcg_dir):
            if file.endswith('.bcg'):
                genome_num += 1

        nwk_file = os.path.join(self._align_output_dir, "all_gene.trees")
        trees = Phylo.parse(nwk_file, 'newick')
        tree = Consensus.majority_consensus(trees,
                                            cutoff=(100-self.config.gsi_threshold) * genome_num/100)
        Phylo.draw_ascii(tree)
        ubcg_gsi_file = os.path.join(self._align_output_dir,
                                     f'UBCG_gsi({self._bcg_num}'
                                     f'){self.config.postfixes.align_tree_const}')
        with open(ubcg_gsi_file, 'w') as f:
            Phylo.write(tree, f, 'newick')

        LOGGER.info("The final tree marked with GSI was written"
                    " to %s", ubcg_gsi_file)
예제 #19
0
파일: tree_edit.py 프로젝트: crosenth/bioy
def action(args):
    def newname(leaf, newname):
        leaf.name = newname
        return leaf

    tree = Phylo.parse(args.tree, args.tree_type).next()
    leafs = (leaf for leaf in tree.get_terminals())

    if args.info:
        info = DictReader(args.info, fieldnames = ['seqname','newname'])
        info = {i['seqname']:i['newname'] for i in info}

        # for newick trees :s will be replaced by |s
        if args.tree_type == 'newick':
            info = {s.replace(':', '|'):n for s,n in info.items()}

        leafs = (l for l in leafs if l.name in info)
        leafs = (newname(l, info[l.name]) for l in leafs)

    if args.remove_word:
        leafs = (newname(l, re.sub(args.remove_word, '', l.name)) for l in leafs)
        leafs = (newname(l, l.name.strip()) for l in leafs)

    leafs = (newname(l, args.add_prefix + l.name) for l in leafs)
    leafs = (newname(l, l.name + args.add_suffix) for l in leafs)

    # do this last
    if args.tree_type == 'newick':
        leafs = (newname(l, l.name.replace(' ', '_')) for l in leafs)

    # execute changes and write tree
    list(leafs)
    Phylo.write(tree, args.out, args.tree_type)
예제 #20
0
def action(args):
    def newname(leaf, newname):
        leaf.name = newname
        return leaf

    tree = Phylo.parse(args.tree, args.tree_type).next()
    leafs = (leaf for leaf in tree.get_terminals())

    if args.info:
        info = DictReader(args.info, fieldnames=['seqname', 'newname'])
        info = {i['seqname']: i['newname'] for i in info}

        # for newick trees :s will be replaced by |s
        if args.tree_type == 'newick':
            info = {s.replace(':', '|'): n for s, n in info.items()}

        leafs = (l for l in leafs if l.name in info)
        leafs = (newname(l, info[l.name]) for l in leafs)

    if args.remove_word:
        leafs = (newname(l, re.sub(args.remove_word, '', l.name))
                 for l in leafs)
        leafs = (newname(l, l.name.strip()) for l in leafs)

    leafs = (newname(l, args.add_prefix + l.name) for l in leafs)
    leafs = (newname(l, l.name + args.add_suffix) for l in leafs)

    # do this last
    if args.tree_type == 'newick':
        leafs = (newname(l, l.name.replace(' ', '_')) for l in leafs)

    # execute changes and write tree
    list(leafs)
    Phylo.write(tree, args.out, args.tree_type)
예제 #21
0
파일: phyloK2.py 프로젝트: ArtPoon/kamphir
 def load_trees_from_file (self, handle):
     """
     Parse a file containing Newick tree strings
     """
     self.trees = []
     
     tree_iter = Phylo.parse(handle, 'newick')
     
     for t in tree_iter:
         if self.rotate=='ladder':
             t.ladderize()
         elif rotate=='random':
             scramble(t)
         else:
             pass
     
         if self.rotate2 == 'none':
             pass
         else:
             gravitate(t, subtree=subtree, mode=rotate2)
    
         if self.normalize != 'none': self.normalize_tree(t, mode=self.normalize)
         if self.resolve_poly:
             collapse_polytomies(t)
         
         self.annotate_tree(t)
         self.trees.append(t)
         
     self.kmat = [[0 for i in self.ntrees] for j in self.ntrees]
     #self.kmat = zeros( (self.ntrees, self.ntrees) )
     self.is_kmat_computed = False
     self.delta_values = {}
    def newTreeInProjectFromString(self,treename,treestring,projectTitle, description,treetype):
        import phyloimport_algorithm, root_phylotree_algorithm

        collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename
        treeCollection = self.db[collectionName]
        treeCollection.drop()
        print "uploading tree to collection: ",collectionName
        print "treetype is: ",treetype

        # if the project does not exist, create it
        projectCollectionName = self.prefixString + 'projects'
        if self.db[projectCollectionName].find_one({"name": projectTitle}) == None:
            self.newProject(projectTitle)

        # create the new collection in mongo for this tree.  The tree is encoded
        # in a string, so it needs to be processed slightly different than from a file
        from StringIO import StringIO
        handle = StringIO(treestring)
        trees = Phylo.parse(handle, treetype)
        #print "length of trees list: ",len(trees)
        for tree in trees:
            phyloimport_algorithm.recursive_clade(tree, treeCollection)
            # add a tree record entry to the 'PyloTree' array in the project record
            self.db[self.projectCollectionName].update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:str(description)}}})
            self.db[self.projectCollectionName].update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}})
            # make sure the tree is rooted, so viewers work
            root_phylotree_algorithm.addRootToTree(treeCollection)

            # emit a signal so the GUI knows to update
            if (self.QtGuiEnabled):
                self.datatypeListChangedSignal.emit();
                self.datasetListChangedSignal.emit();
예제 #23
0
def main():
    # getting the tree
    tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick')
    tree_object = next(tree_gen)

    # the tree basic information
    print(tree_info(tree_object))

    # drawing the tree
    Phylo.draw(tree_object)

    # distance comparing
    tns = dendropy.TaxonNamespace()
    tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns)
    tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns)

    euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two)
    robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two)
    print("Robinson Foulds distance: ", robinson_distance)
    print("Euclidean distance: ", euclidean_distance)

    # common ancestors
    common_ancestor_tree = tree_object.common_ancestor({"name": "C"},
                                                       {"name": "D"})
    common_ancestor_tree.color = "blue"
    print("COMMON ANCESTOR: ", common_ancestor_tree)
    Phylo.draw(common_ancestor_tree)
예제 #24
0
def main():
    parser = argparse.ArgumentParser(
        description='Generate clusters of tips from a tree that have a path length within '
                    'a maximum distance of each other.'
    )
    parser.add_argument('tree', help='<input> file containing Newick tree string.')
    parser.add_argument('cutoff', type=float, help='Maximum patristic distance.')
    parser.add_argument('outfile', default=None, help='<output> file to write results in CSV format.')
    parser.add_argument('--minimize', help='Report no more than one nearest neighbour per tip.', action='store_true')
    parser.add_argument('--keep_ties', help='If more than one tip has the same patristic distance, '
                        'report all as nearest neighbours.', action='store_true')
    parser.add_argument('--overwrite', help='Overwrite existing output file.', action='store_true')
    args = parser.parse_args()

    assert args.cutoff > 0, 'Cutoff %f must be greater than 0.' % (args.cutoff, )

    if os.path.exists(args.outfile) and not args.overwrite:
        print 'Output file', args.outfile, 'already exists, use --overwrite.'
        sys.exit()

    outfile = open(args.outfile, 'w')
    outfile.write('tree,tip1,tip2,dist,is.tie\n')

    trees = Phylo.parse(args.tree, 'newick')
    for treenum, tree in enumerate(trees):
        results = find_short_edges(tree, args.cutoff)
        for key, dist in results.iteritems():
            outfile.write('%d,%s,%s,%f\n' % (treenum, key[0], key[1], dist))

    outfile.close()
예제 #25
0
def nexus2nhx(infile, outfile):
    """Designed to work with Beast/treeannotator output"""
    with open(outfile, 'w') as out:
        for tree in Phylo.parse(infile, 'nexus'):
            for node in tree.get_terminals() + tree.get_nonterminals():
                node.comment = NHX_comment_formatter(
                                    beast_comment_parser(node.comment))
            Phylo.write(tree, out, 'newick')
예제 #26
0
 def __init__(self, filename):
     tree = next(Phylo.parse(filename,'newick'))   
     self.data = np.array(getConfidence(tree.root))
     _, genes, _,_, self.method, pops = os.path.basename(filename).split('_')[:6]
     self.pops = int(pops)
     self.genes = genes.split('-')
     #self.repr = len(self.genes), self.method, self.pops 
     self.repr = f'{genes}/{self.pops}' 
예제 #27
0
 def test_parse(self):
     """Extract and count phylogenetic trees using Phylo.parse."""
     for filename in nexml_files:
         count = tree_counts.get(filename, 1)
         path = os.path.join("NeXML", filename)
         msg = "Failed parser test for %s" % path
         trees = list(Phylo.parse(path, "nexml"))
         self.assertEqual(len(trees), count, msg=msg)
예제 #28
0
def get_species_names_in_tree(trees_filename):
	logger.debug("Getting species names from trees %s" % trees_filename)
	species_name_list = set()
	trees = list(Phylo.parse(trees_filename, "newick"))
	for tree in trees:
		for clade in tree.get_terminals():
			species_name_list.add(clade.name)
	return species_name_list
 def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
예제 #30
0
 def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
예제 #31
0
def load_tree_impl(file):
    try:
        tree = Phylo.read(file, phylo_formats[pathlib.Path(file).suffix])
    except ValueError:
        tree = next(Phylo.parse(file, phylo_formats[pathlib.Path(file).suffix]))
    Phylo.draw(tree, do_show=False)
    plt.savefig(pathlib.Path(file).with_suffix(".png"))
    tree = tree.as_phyloxml()
    return pathlib.Path(file).with_suffix(".png"), tree.count_terminals(), tree.total_branch_length()
예제 #32
0
 def test_convert_phyloxml_binary(self):
     """Try writing phyloxml to a binary handle; fail on Py3."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     with tempfile.NamedTemporaryFile(mode="wb") as out_handle:
         if sys.version_info[0] < 3:
             count = Phylo.write(trees, out_handle, "phyloxml")
             self.assertEqual(13, count)
         else:
             self.assertRaises(TypeError, Phylo.write, trees, out_handle, "phyloxml")
예제 #33
0
def mbtrees(input_file, output_file, **kwargs):
    """
    Get a list of trees from output of MrBayes
    """
    from Bio import Phylo
    with open(output_file, "w") as f:
        for i, t in enumerate(Phylo.parse(input_file, format="nexus")):
            if i % kwargs["downsample"] == 0 and i > kwargs["burnin"]:
                Phylo.write(t, f, format="newick")
예제 #34
0
 def test_include(self):
     """
     Test PhyTrees.include() method.
     """
     infile1 = 'Newick/f002.trees.newick'
     infile2 = 'Nexus/f005.trees.nexus'
     self.assertTrue(os.path.isfile(infile1))
     self.assertTrue(os.path.isfile(infile2))
     tree_db = PhyTrees.from_treefile(infile1, 'newick')
     tree_db.include(infile2, 'nexus')
     # Check the sequence data
     inlist1 = [tree for tree in Phylo.parse(infile1, 'newick')]
     inlist2 = [tree for tree in Phylo.parse(infile2, 'nexus')]
     self.assertEqual(len(inlist1) + len(inlist2), len(tree_db))
     # Check the report information
     self.assertIn('Tests/Newick/f002.trees.newick', tree_db._report[0][1])
     self.assertIn('newick', tree_db._report[0][2])
     self.assertIn('Tests/Nexus/f005.trees.nexus', tree_db._report[1][1])
     self.assertIn('nexus', tree_db._report[1][2])
예제 #35
0
 def test_include ( self ) :
     """
     Test PhyTrees.include() method.
     """
     infile1 = 'Newick/f002.trees.newick'
     infile2 = 'Nexus/f005.trees.nexus'
     self.assertTrue(os.path.isfile(infile1))
     self.assertTrue(os.path.isfile(infile2))
     tree_db = PhyTrees.from_treefile(infile1, 'newick')
     tree_db.include(infile2, 'nexus')
     # Check the sequence data
     inlist1 = [tree  for tree in Phylo.parse(infile1, 'newick')]
     inlist2 = [tree  for tree in Phylo.parse(infile2, 'nexus')]
     self.assertEqual(len(inlist1) + len(inlist2), len(tree_db))
     # Check the report information
     self.assertIn('Tests/Newick/f002.trees.newick', tree_db._report[0][1])
     self.assertIn('newick', tree_db._report[0][2])
     self.assertIn('Tests/Nexus/f005.trees.nexus', tree_db._report[1][1])
     self.assertIn('nexus', tree_db._report[1][2])
예제 #36
0
    def RootTree_rootBottom(self, OriNwk, RootTaxa):	
        OutF=open('test.nwk','w')
        OutF.write(OriNwk)
        OutF.close()		
        trees = list(Phylo.parse('test.nwk', 'newick'))
        for tree in trees:
           tree = tree.root_with_outgroup({'name': RootTaxa})
        Phylo.write(trees, 'test1.nwk', "newick")	

        return open('test1.nwk','r').readlines()[0]	
예제 #37
0
 def test_convert_phyloxml_binary(self):
     """Try writing phyloxml to a binary handle; fail on Py3."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     with tempfile.NamedTemporaryFile(mode="wb") as out_handle:
         if sys.version_info[0] < 3:
             count = Phylo.write(trees, out_handle, "phyloxml")
             self.assertEqual(13, count)
         else:
             self.assertRaises(TypeError, Phylo.write, trees, out_handle,
                               "phyloxml")
예제 #38
0
def Tree_Filter(tree_url, value):

	tree_stream = StringIO(requests.get(tree_url).text)
	trees = Phylo.parse(tree_stream, "newick")

	for tree in trees:
		element = tree.find_any(name=".*{0}.*".format(value))
		if element is not None:
			return True
	return False
예제 #39
0
    def __init__(self, tree_file):
        # Load graph from tree file
        with open(tree_file) as tf:
            tree_line = tf.readline()
            tree_string = StringIO(tree_line)
            # tree_phylo = next(PantherNewickIOParser(tree_string).parse())
            tree_phylo = next(Phylo.parse(tree_string, "newick"))
            # Leaves parse clean due to not having species name in 'S:'

        self.tree: Newick.Tree = tree_phylo
예제 #40
0
파일: test_Phylo.py 프로젝트: ajm/biopython
    def test_convert_phyloxml_to_newick_branch_length_only(self):
        """Write phyloxml with bootstrap values to newick format using branch_length_only=True"""
        trees = Phylo.parse(EX_APAF, "phyloxml")
        tmp_filename = tempfile.mktemp()

        try :
            Phylo.write(trees, tmp_filename, "newick", branch_length_only=True)
            os.remove(tmp_filename)

        except TypeError:
            self.fail()
예제 #41
0
def detect_type(filename):
    """
    :param filename: File to read and detect the format
    :return: detected type, in [fasta, phylip, phylip-relaxed, newick, N/A]

    Tests formats using biopython SeqIO or Phylo
    """
    mimetype=magic.from_file(filename,mime=True)

    if mimetype != "text/plain" :
        return mimetype
    
    # Check Fasta Format
    try:
        nbseq = 0
        for r in SeqIO.parse(filename, "fasta"):
            nbseq += 1
        if nbseq > 0:
            return "fasta"
    except Exception:
        pass

    # Check phylip strict
    try:
        nbseq = 0
        for r in SeqIO.parse(filename, "phylip"):
            nbseq += 1
        if nbseq > 0:
            return "phylip"
    except Exception:
        pass

    # Check phylip relaxed
    try:
        nbseq = 0
        for r in SeqIO.parse(filename, "phylip-relaxed"):
            nbseq += 1
        if nbseq > 0:
            return "phylip"
    except Exception:
        pass

    # Check Newick
    try:
        nbtrees = 0
        trees = Phylo.parse(filename, 'newick')
        for t in trees:
            nbtrees += 1
        if nbtrees > 0:
            return "nhx"
    except Exception as e:
        pass

    return "txt"
예제 #42
0
def load_tree(tree_filename):
    if tree_filename is not None:
        print('\nLoading tree:')
        trees = Phylo.parse(tree_filename, 'newick')
        whitelist_assemblies = set()
        for tree in trees:
            whitelist_assemblies |= set(get_tip_names(tree.root))
        print('    found {:,} assemblies in {}'.format(len(whitelist_assemblies), tree_filename))
    else:
        whitelist_assemblies = None
    return whitelist_assemblies
예제 #43
0
 def test_majority_consensus(self):
     # three trees
     # ref_tree = open('./TreeConstruction/majority_ref.tre')
     ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick"))
     consensus_tree = Consensus.majority_consensus(self.trees)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0]))
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
예제 #44
0
def checkLength(treeFile):
    tree = Phylo.parse(treeFile, 'phyloxml').next()
    i = 0
    sum = 0
    totalBranch = 0
    branchList = getAllBranchLength(tree)
    for branch in branchList:
        sum = sum+branchList[i]
        i = i+1
        totalBranch = totalBranch+1
    avg = sum/totalBranch
    if avg < 1:
        return False
    return True
예제 #45
0
 def test_treefile_source ( self ) :
     """
     Test PhyTrees.from_treefile() and PhyTrees.write() methods.
     """
     infile = 'Newick/f002.trees.newick'
     self.assertTrue(os.path.isfile(infile))
     tree_db = PhyTrees.from_treefile(infile, 'newick')
     outfile = 'tmp_test.newick'
     outrepfile = 'tmp_test.rep'
     self.files_to_clean.add(outfile)
     self.files_to_clean.add(outrepfile)
     tree_db.write(outfile)
     self.assertTrue(os.path.isfile(outfile))
     # Check the content of both sequence files
     self.assertEqual(len(list(Phylo.parse(infile, 'newick'))),
                      len(list(Phylo.parse(outfile, 'newick'))))
     # Check the content of the report file
     with open(outrepfile, 'r') as repfile :
         for line in repfile.readlines() :
             self.assertTrue(('Num. trees: 9' in line) or
                 ('History:' in line) or
                 (bool(re.match(r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
                                    [ ]+.*Tests/Newick/f002\.trees\.newick
                                    \ +newick""", line, re.VERBOSE))))
예제 #46
0
def __main__():
	aZip = zipfile.ZipFile(sys.argv[1],'r')
	aZip.extractall('.')
	id = 0
	for name in aZip.namelist():
		treeFile = name
		if not checkLength(treeFile):
			tree = Phylo.parse(treeFile, 'phyloxml').next()
			newTree = changeLength(tree)
			newTName = treeFile.split('.')[0]+'_adj.xml'
    			Phylo.write(newTree,newTName,'phyloxml')
			treeFile = newTName
		outputFileName = 'tree_'+ str(id) + '.html'
		writeHeaderFile(outputFileName)
		writeEndofFile(outputFileName,treeFile)
		id = id+1
 def newTreeInProject(self,treename,treefile,projectTitle, treetype):
     import phyloimport_algorithm, root_phylotree_algorithm
     collectionName = self.returnCollectionForObjectByName(projectTitle, 'PhyloTree', treename)
     #collectionName = self.prefixString+projectTitle+self.separatorString+"PhyloTree"+self.separatorString+treename
     treeCollection = self.db[collectionName]
     print "uploading tree to collection: ",collectionName
     print "treetype is: ",treetype
     # create the new collection in mongo for this tree
     trees = Phylo.parse(treefile, treetype)
     #print "length of trees list: ",len(trees)
     for tree in trees:
         #process tree
         phyloimport_algorithm.recursive_clade(tree, treeCollection)
         root_phylotree_algorithm.addRootToTree(treeCollection)
         # add a tree record entry to the 'PyloTree' array in the project record
         self.db[self.projectCollectionName].update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:treefile}}})
         self.db[self.projectCollectionName].update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}})
예제 #48
0
 def test_strict_consensus(self):
     ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick'))
     # three trees
     consensus_tree = Consensus.strict_consensus(self.trees)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.strict_consensus(self.trees[:2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     # tree 1 and tree 3
     consensus_tree = Consensus.strict_consensus(self.trees[::2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
    def newTreeInProject(self,treename,treefile,projectTitle, treetype):
        collectionName = self.prefixString+projectTitle+"_"+"PhyloTree"+"_"+treename
        treeCollection = self.db[collectionName]
        print "uploading tree to collection: ",collectionName
        print "treetype is: ",treetype
        # create the new collection in mongo for this tree
        trees = Phylo.parse(treefile, treetype)
        #print "length of trees list: ",len(trees)
        for tree in trees:
            #process tree
            phyloimport_algorithm.recursive_clade(tree, treeCollection)
            # add a tree record entry to the 'PyloTree' array in the project record
            self.db.ar_projects.update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:treefile}}})
            self.db.ar_projects.update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}})

        # emit a signal so the GUI knows to update
        self.datatypeListChangedSignal.emit(); 
        self.datasetListChangedSignal.emit();               
def readconvert(filesuffix, treeformat_input, treeformat_output, namesfile):
	idtable = {}
	f = open(namesfile, "r")
	for line in f:
		fields = line.rstrip().split("\t")
		idtable[fields[0]] = fields[1]
	#this is the list containing the file names
	filelist = glob.glob('*.'+str(filesuffix.replace('.','')))
	for i in filelist:
		tree = Phylo.parse(i, treeformat_input)
		for t in tree:
			for node in t.get_terminals():
				name = node.name
				if name in idtable:
					node.name = idtable[name]
				else:
					node.name = name
					print name +' not in table'
			Phylo.write(t,i.replace('.tree', '_tipsrenamed.tree'), treeformat_output)
예제 #51
0
 def test_adam_consensus(self):
     # ref_trees = open('./TreeConstruction/adam_refs.tre')
     ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick"))
     # three trees
     consensus_tree = Consensus.adam_consensus(self.trees)
     # tree_file = '/home/yeyanbo/adam.tres'
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.adam_consensus(self.trees[:2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     # tree 1 and tree 3
     consensus_tree = Consensus.adam_consensus(self.trees[::2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
예제 #52
0
 def test_bioseqs_source ( self ) :
     """
     Test PhyTrees.from_bioseqs() method and len() property.
     """
     infile = 'PhyTrees/f002.trees.newick'
     inrepfile = 'PhyTrees/f002.trees.rep'
     self.assertTrue(os.path.isfile(infile))
     self.assertTrue(os.path.isfile(inrepfile))
     tree_db = PhyTrees.from_phytrees(infile)
     # Check the content of the PhyTrees' object
     self.assertEqual(len(tree_db), len(list(Phylo.parse(infile, 'newick'))))
     # Check the content of the PhyTrees' report
     with open(inrepfile, 'r') as repfile :
         line = repfile.readline().strip() # Num. trees: 9
         self.assertEqual(len(tree_db), int(line[-2:]))
         line = repfile.readline() # History:
         line = repfile.readline().strip() # [First source information]
         source_info = line.split('    ')
         self.assertEqual(tree_db._report[0], tuple(source_info))
 def newTreeInProjectFromString(self,treename,treestring,projectTitle, description,treetype):
     collectionName = self.prefixString+projectTitle+"_"+"PhyloTree"+"_"+treename
     treeCollection = self.db[collectionName]
     print "uploading tree to collection: ",collectionName
     print "treetype is: ",treetype
     # create the new collection in mongo for this tree.  The tree is encoded 
     # in a string, so it needs to be processed slightly different than from a file
     from StringIO import StringIO
     handle = StringIO(treestring)
     trees = Phylo.parse(handle, treetype)
     #print "length of trees list: ",len(trees)
     for tree in trees:
         phyloimport_algorithm.recursive_clade(tree, treeCollection)
         # add a tree record entry to the 'PyloTree' array in the project record
         self.db.ar_projects.update({"name": projectTitle}, { '$push': {u'PhyloTree': {treename:str(description)}}})
         self.db.ar_projects.update({"name": projectTitle}, { '$addToSet': {u'datatypes': u'PhyloTree'}})
         # make sure the tree is rooted, so viewers work
         root_phylotree_algorithm.addRootToTree(treeCollection)
         # emit a signal so the GUI knows to update
         self.datatypeListChangedSignal.emit(); 
         self.datasetListChangedSignal.emit();               
예제 #54
0
def consensus(outdir, min_freq=0.5, is_rooted=True,
              trees_splits_encoded=False):
    """Generate a rooted consensus tree"""
    # first ensure that all trees in the distribution have same number
    # of taxa, otherwise, make it so by dropping taxa not present in
    # all trees
    all_tip_names = []
    # read in from distribution.tre
    phylogenies = []
    phyloparse = Phylo.parse(os.path.join(outdir, 'distribution.tre'), 'newick')
    for p in phyloparse:
        phylogenies.append(p)
    for phylogeny in phylogenies:
        terminals = phylogeny.get_terminals()
        all_tip_names.append([e.name for e in terminals])
    counted = Counter(sum(all_tip_names, []))
    to_drop = [e for e in counted.keys() if counted[e] < len(phylogenies)]
    if (len(counted.keys()) - len(to_drop)) < 3:
        return False
    for tip_names, phylogeny in zip(all_tip_names, phylogenies):
        dropping = [e for e in tip_names if e in to_drop]
        for tip_name in dropping:
            phylogeny.prune(tip_name)
    with open('.for_consensus.tre', "w") as file:
        Phylo.write(phylogenies, file, 'newick')
    # create dendropy list
    trees = dp.TreeList()
    trees.read_from_path('.for_consensus.tre', "newick", rooting='force-rooted')
    os.remove('.for_consensus.tre')
    # https://groups.google.com/forum/#!topic/dendropy-users/iJ32ibnS5Bc
    sd = dp.SplitDistribution(taxon_namespace=trees.taxon_namespace)
    #sd.is_rooted = is_rooted
    tsum = dp.calculate.treesum.TreeSummarizer()
    tsum.count_splits_on_trees(trees, split_distribution=sd)
                               #trees_splits_encoded=trees_splits_encoded)
    consensus = tsum.tree_from_splits(sd, min_freq=min_freq)
    consensus.write_to_path(os.path.join(outdir, 'consensus.tre'), "newick")
    return True
    def standard_test ( self, informat, outformat, params ) :
        """
        Standard testing procedure used by all tests.

        Arguments :
            informat  ( string )
                Input file format.
            outformat  ( string )
                Output file format.
            params  ( string )
                Arguments passed to the consensus tree tool.
        """
        infile = '{}/f002.trees.{}'.format(informat.capitalize(), informat)
        outfile = 'tmp_test.tree'
        self.add_file_to_clean(outfile)
        # Check the input
        self.assertTrue(os.path.isfile(infile))
        self.assertEqual(len(list(Phylo.parse(infile, informat))), 9)
        # Generate the consensus tree
        PhyloAssemble.get_consensus_tree(consense_exe, infile, informat,
            args=params, outfile=outfile, outfile_format=outformat)
        # Check the output
        self.assertTrue(os.path.isfile(outfile))
예제 #56
0
 def test_newick_read_multiple(self):
     """Parse a Nexus file with multiple trees."""
     trees = list(Phylo.parse(EX_NEXUS, 'nexus'))
     self.assertEqual(len(trees), 3)
     for tree in trees:
         self.assertEqual(len(tree.get_terminals()), 9)
예제 #57
0
 def setUp(self):
     self.phylogenies = list(Phylo.parse(EX_PHYLO, 'phyloxml'))
예제 #58
0
 def test_convert_phyloxml_text(self):
     """Write phyloxml to a text handle."""
     trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
     with tempfile.NamedTemporaryFile(mode="w") as out_handle:
         count = Phylo.write(trees, out_handle, "phyloxml")
         self.assertEqual(13, count)