Esempio n. 1
0
def writeNewick(species, distance, output):
    '''
	Input is a list of species names and the top half of the distance matrix
	Newick tree is output to standard out.	

	'''
    outputFile = sys.stdout
    if args.output:
        outputFile = open(output, 'w')
    import Bio.Phylo.TreeConstruction as TreeConstruction
    constructor = TreeConstruction.DistanceTreeConstructor()
    distanceMatrix = TreeConstruction._DistanceMatrix(species, distance)
    treeConstructor = TreeConstruction.DistanceTreeConstructor(method='nj')
    njTree = treeConstructor.nj(distanceMatrix)
    TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16))
    while checkTempNum(TEMP_FILE_NUM):
        TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16))
    tempFile = open(".tempFile" + TEMP_FILE_NUM, 'w')
    from Bio import Phylo
    Phylo.write(njTree, tempFile, "newick")
    tempFile.close()
    import re
    treeF = open(".tempFile" + TEMP_FILE_NUM, 'r')
    tree = treeF.read()
    treeF.close()
    os.remove(".tempFile" + TEMP_FILE_NUM)

    tree = re.sub("Inner[0-9]+:[-0-9\.]+", "", tree)
    tree = re.sub(":[0-9\.]+", "", tree)
    tree = re.sub("_", " ", tree)
    outputFile.write(tree)
    if args.output:
        outputFile.close()
    def _make_nj_tree(self, treesams, dm):
        """
        **PRIVATE**

        Parameters
        ----------
        treesams: dict
            {sam name: samid, sam name: samid, ...}

        Returns
        -------
        nwkstring: str
            tree as newick string
        """

        iNofSams = len(treesams.keys())
        logging.info("Calculating %i distances. Patience!", ((iNofSams**2) - iNofSams) / 2)

        dist_mat = get_distance_matrix(self.cur, treesams.values())

        if dm != None:
            logging.info("Distance matrix written to file: %s", dm)
            if os.path.exists(dm) == True:
                os.remove(dm)

        aSampleNames = treesams.keys()
        aSimpleMatrix = []
        for i, sample_1 in enumerate(aSampleNames):
            mat_line = []
            for j, sample_2 in enumerate(aSampleNames):
                if j < i:
                    sid1 = treesams[sample_1]
                    sid2 = treesams[sample_2]
                    mat_line.append(dist_mat[sid1][sid2])
                elif j == i:
                    mat_line.append(0)
                else:
                    pass
            aSimpleMatrix.append(mat_line)
            if dm != None:
                with open(dm, 'a') as f:
                    f.write("%s\n" % ','.join([sample_1] + [str(x) for x in mat_line[:-1]]))

        logging.info("Bulding tree.")
        oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix)
        constructor = TreeConstruction.DistanceTreeConstructor()
        oTree = constructor.nj(oDistMat)

        # I don't know how to get newick string from this object without a file ...
        td = tempfile.mkdtemp()
        tmpfile = os.path.join(td, 'tree.nwk')
        Phylo.write(oTree, tmpfile, 'newick')
        nwkstring = ""
        with open(tmpfile, 'r') as f:
            nwkstring = f.read()
        shutil.rmtree(td)

        return nwkstring
Esempio n. 3
0
 def NJ(self, f=min):
     m = self.distanceMatrix()
     for i in range(len(self.languages)):
         for j in range(i, len(self.languages)):
             m[i][j] = f(m[i][j], m[j][i])
             m[j][i] = m[i][j]
     predm = [[m[i][j] for j in range(i + 1)]
              for i in range(len(self.languages))]
     #	print(predm)
     dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages],
                                           predm)
     constructor = TreeConstruction.DistanceTreeConstructor(method='nj')
     njtree = constructor.nj(dm)
     Phylo.draw_ascii(njtree)
Esempio n. 4
0
def make_nj_tree(dist_mat, dArgs, aSampleNames):
    '''
    Uses Biopython.Phylo to make a neighbour joining tree from a distance matrix

    Parameters
    ----------
    dist_mat: dict
        distance matrix as a dict of dicts
        distance_a_to_b = dist_mat[a][b]
    dArgs: dict
        input argument dictionary
    aSampleNames: list
        list of sample names

    Returns
    -------
    returns 0
    also writes tree file to to dArgs['tree'] in newick format
    '''

    aSimpleMatrix = []
    for i, sample_1 in enumerate(aSampleNames):
        mat_line = []
        for j, sample_2 in enumerate(aSampleNames):
            if j < i:
                mat_line.append(dist_mat[sample_1][sample_2])
            elif j == i:
                mat_line.append(0)
            else:
                pass
        aSimpleMatrix.append(mat_line)

    oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix)
    constructor = TreeConstruction.DistanceTreeConstructor()
    oTree = constructor.nj(oDistMat)
    Phylo.write(oTree, dArgs['tree'], 'newick')
    logging.info("Tree file written.")

    return 0
Esempio n. 5
0
    def UPGMA(self, f=min):
        """builds a tree via UPGMA, and uses the passed in function to deal with asymmetric 'distances'"""
        m = self.distanceMatrix()
        for i in range(len(self.languages)):
            for j in range(i, len(self.languages)):
                m[i][j] = f(m[i][j], m[j][i])
                m[j][i] = m[i][j]
        predm = [[m[i][j] for j in range(i + 1)]
                 for i in range(len(self.languages))]
        #	print(predm)
        dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages],
                                              predm)
        constructor = TreeConstruction.DistanceTreeConstructor(method='upgma')
        upgmatree = constructor.upgma(dm)
        Phylo.draw_ascii(upgmatree)
        #	indices = range(len(self.languages))
        #	while len(indices)>1:
        #		#find minimum distance in m
        #
        #		#join indices as  tuple
        #		#recalculate m

        return m
Esempio n. 6
0
				scores[pdbs[i]] = {}
				PhyloM.append([])
				scoresM.append([])
			scores[pdbs[i]][pdbs[j]] = 0
			scoresM[i].append(0)
		
print_matrix("Scores", scores, pdbs)
scoresM = TreeConstruction._Matrix([x[x.rfind('/')+1:] for x in pdbs], scoresM)

distances = {}
for i in range(leng):
	distances[pdbs[i]] = {}
	for j in range(i+1):
		distances[pdbs[i]][pdbs[j]] = (scores[pdbs[i]][pdbs[i]]+scores[pdbs[j]][pdbs[j]])/2.0 - scores[pdbs[i]][pdbs[j]]
		PhyloM[i].append(distances[pdbs[i]][pdbs[j]])
PhyloM = TreeConstruction._DistanceMatrix([x[x.rfind('/')+1:] for x in pdbs], PhyloM)
print_matrix("Distances", distances, pdbs)

tree = TreeConstruction.DistanceTreeConstructor().upgma(PhyloM)
Phylo.draw_ascii(tree)
tree.ladderize()
#Phylo.draw_graphviz(tree, node_size=0)
def hide_inner(node):
	if node.name.startswith("Inner"):
		return None
	else:
		return node.name
try:
	Phylo.draw(tree, label_func=hide_inner, do_show=False)
	#show()
	savefig(multy_png)
                scoresM.append([])
            scores[pdbs[i]][pdbs[j]] = 0
            scoresM[i].append(0)

print_matrix("Scores", scores, pdbs)
scoresM = TreeConstruction._Matrix([x[x.rfind('/') + 1:] for x in pdbs],
                                   scoresM)

distances = {}
for i in range(leng):
    distances[pdbs[i]] = {}
    for j in range(i + 1):
        distances[pdbs[i]][pdbs[j]] = (scores[pdbs[i]][pdbs[i]] + scores[
            pdbs[j]][pdbs[j]]) / 2.0 - scores[pdbs[i]][pdbs[j]]
        PhyloM[i].append(distances[pdbs[i]][pdbs[j]])
PhyloM = TreeConstruction._DistanceMatrix([x[x.rfind('/') + 1:] for x in pdbs],
                                          PhyloM)
print_matrix("Distances", distances, pdbs)

tree = TreeConstruction.DistanceTreeConstructor().upgma(PhyloM)
Phylo.draw_ascii(tree)
tree.ladderize()


#Phylo.draw_graphviz(tree, node_size=0)
def hide_inner(node):
    if node.name.startswith("Inner"):
        return None
    else:
        return node.name