def nj(distMat, texaName = None, precision = 5, dspTree = True, saveFlag = True): """ unrooted tree, last three clusters """ #check input distance matrix format distMat = np.array(distMat, dtype = float); util.checkInput(distMat); # Initialization #njTree = JTree; distMatInput = distMat[:] #deep copy for estimate error calculation numTexa = distMat.shape[0]; if texaName: newick = texaName[:]; else: newick = [ch for ch in string.ascii_uppercase[0:numTexa]]; texaName = newick[:]; nodeName = newick[:]; # cluster represented by each node in nodeID nodePT = range(0, numTexa); # node pointer edgeLengths = []; # combine clusters till the number of clusters (nc) is two for nc in range(numTexa, 2, -1): colSum = np.sum(distMat, axis = 0); # sums of columns Q = (nc - 2) * distMat - np.tile(np.matrix(colSum).T, (1, nc)) \ - np.tile(colSum, (nc, 1)) \ + np.diag(np.repeat(np.inf, nc)); #Studier & Keppler # find the indicies corresponding to the minimum value of Q # consider only upper triangle to make sure i > j format of indices # print "Q", (Q == Q.T).all() [i,j] = util.findMinInd(Q, nodePT, numTexa, precision); nodeJoin = [nodePT[i], nodePT[j]]; # Update node pointer, node name, node ID nodeName.append("(" + nodeName[nodePT[i]] + "," + nodeName[nodePT[j]] + ")"); newNodeInd = range(0,i) + range(i+1,j) + range(j+1,nc); nodePT = [nodePT[dummy] for dummy in newNodeInd]; nodePT.append(len(nodeName) - 1); # compute distance from i and j to the new node (ij) branchLen = (np.dot(colSum[[i,j]], np.array([[1,-1],[-1,1]]))/(2*(nc-2)) + distMat[i,j]/2); edgeLengths.extend(branchLen); # update newick string newickStr = util.newickStringFormat(newick, nodeJoin, branchLen, precision); newick.append(newickStr); # compute distance between the new node (ij) and remaining nodes # excluding i and j and construct new distance matrix. dist = (np.sum(distMat[np.ix_([i,j], newNodeInd)], axis = 0) - distMat[i,j]) / 2; dist = np.array([dist]); # if np.any(dist < 0): # dist = np.fmax(dist,0); #no negative distance distMatTop = np.concatenate((distMat[np.ix_(newNodeInd, newNodeInd)], dist.T), axis = 1); distMatBottom = np.append(dist, 0); distMat = np.vstack((distMatTop,[distMatBottom])); # update distance matrix nodeName.append("(" + nodeName[nodePT[0]] + "," + nodeName[nodePT[1]] + ")"); newickStr = util.newickStringFormat(newick, nodePT, distMat[[0,1],[1,0]]/2, precision) + ";"; newick.append(newickStr); edgeLengths.append(distMat[0,1]); print "Tree Building\n", nodeName print "\nNewick Tree\n", newick[-1] if dspTree: import rpy2.robjects.packages as rp tree = Phylo.read(StringIO(newick[-1]), "newick"); Phylo.draw(tree); ape = rp.importr('ape') t = ape.read_tree(text = newick[-1]) ape.plot_phylo(t, type = 'unrooted') util.estimateError(newick[-1], distMatInput, texaName); print "Total length of the tree L = ", sum(edgeLengths) if saveFlag: with open("result.nw", 'w') as f: f.write(newick[-1]); return newick[-1]