Ejemplo n.º 1
0
class NewickReader:
	"""
		Just a wrapper around Bio.Nexus.Trees to read newick files. 
		In addition, since many of my newick taxon labels are just ncRNA <db_id>s, 
		support database lookup for these IDs as well.
	"""
	def __init__(self, filename):
		self.filename = filename
		self.tree = None

		f = open(self.filename, 'r')
		chunk = f.read()
		f.close()
		self.tree = Tree(chunk)

	def distance(self, taxon1, taxon2):
		"""
			Note that here "taxon" simply means whatever the terminal nodes' data are.
			Since most of my newick files are labeled with <db_id>, it could just be ex: '34969'.
		"""
		id1 = self.tree.search_taxon(taxon1)
		id2 = self.tree.search_taxon(taxon2)
		return self.tree.distance(id1, id2)
        alltaxa = mytreeobj.get_taxa()
        badtaxa = []
        slowest_inparalogs = {}

        for taxon in alltaxa:
            if taxon not in oldid_newid.values():
                badtaxa.append(taxon)

            else:
                sp = taxon.split('_bpgseq')[0]

                if sp in slowest_inparalogs:
                    (old_taxon, old_brlen) = slowest_inparalogs[sp]
                    new_brlen = mytreeobj.node(
                        mytreeobj.search_taxon(taxon)).get_data().branchlength

                    if new_brlen < old_brlen:
                        slowest_inparalogs[sp] = (
                            taxon, mytreeobj.node(mytreeobj.search_taxon(
                                taxon)).get_data().branchlength)
                        badtaxa.append(old_taxon)

                    else:
                        badtaxa.append(taxon)

                else:
                    slowest_inparalogs[sp] = (taxon,
                                              mytreeobj.node(
                                                  mytreeobj.search_taxon(taxon)
                                              ).get_data().branchlength)
    alltaxa = mytreeobj.get_taxa()
    badtaxa = []
    slowest_inparalogs = {}

    for taxon in alltaxa:
        if taxon not in oldid_newid.values():
            badtaxa.append(taxon)

        else:
            sp = taxon.split('_seqh')[0]

            if sp in slowest_inparalogs:
                (old_taxon, old_brlen) = slowest_inparalogs[sp]
                new_brlen = mytreeobj.node(
                    mytreeobj.search_taxon(taxon)).get_data().branchlength

                if new_brlen < old_brlen:
                    slowest_inparalogs[sp] = (taxon,
                                              mytreeobj.node(
                                                  mytreeobj.search_taxon(taxon)
                                              ).get_data().branchlength)
                    badtaxa.append(old_taxon)

                else:
                    badtaxa.append(taxon)

            else:
                slowest_inparalogs[sp] = (
                    taxon, mytreeobj.node(
                        mytreeobj.search_taxon(taxon)).get_data().branchlength)