def updateEDMSTI(d, seq, tree, num, clusteringType): newD = {} if clusteringType == 'NJ': maxSimilarity = NJUtils.getMaxSimilarityForNJ(d, seq) elif clusteringType == 'UPGMA': maxSimilarity = UPGMAUtils.getMaxSimilarityForUPGMA(d) elif clusteringType == 'WPGMA': maxSimilarity = WPGMAUtils.getMaxSimilarityForWPGMA(d) f = maxSimilarity['key'][0] iF = seq.index(f) numF = num[iF] g = maxSimilarity['key'][1] if f == g: iG = iF + 1 + seq[iF + 1:].index(g) else: iG = seq.index(g) numG = num[iG] u = '(' + f + ':' + str(d[maxSimilarity['key']]) + ':' + g + ')' seq.remove(f) seq.remove(g) if clusteringType == 'NJ': delta = NJUtils.getDeltaDistForNJ(d, f, g, u, seq) elif clusteringType == 'UPGMA': delta = UPGMAUtils.getDeltaDistForUPGMA(d, f, g, u) elif clusteringType == 'WPGMA': delta = WPGMAUtils.getDeltaDistForWPGMA(d, f, g, u) tree.append({'1': u, '2': f, 'dist': delta[u, f]}) tree.append({'1': u, '2': g, 'dist': delta[u, g]}) for i in range(len(seq)): for j in range(len(seq)): if i != j: newD[seq[i], seq[j]] = d[seq[i], seq[j]] if clusteringType == 'NJ': newD = NJUtils.getDistUForNJ(d, newD, f, g, u, seq) elif clusteringType == 'UPGMA': newD = UPGMAUtils.getDistUForUPGMA(d, newD, f, g, u, seq, numF, numG) elif clusteringType == 'WPGMA': newD = WPGMAUtils.getDistUForWPGMA(d, newD, f, g, u, seq) seq.append(u) num.append(numF + numG + 1) return newD, seq, num, tree, iF, iG
def nj(distMat, texaName = None, precision = 5, dspTree = True, saveFlag = True): """ unrooted tree, last three clusters """ #check input distance matrix format distMat = np.array(distMat, dtype = float); util.checkInput(distMat); # Initialization #njTree = JTree; distMatInput = distMat[:] #deep copy for estimate error calculation numTexa = distMat.shape[0]; if texaName: newick = texaName[:]; else: newick = [ch for ch in string.ascii_uppercase[0:numTexa]]; texaName = newick[:]; nodeName = newick[:]; # cluster represented by each node in nodeID nodePT = range(0, numTexa); # node pointer edgeLengths = []; # combine clusters till the number of clusters (nc) is two for nc in range(numTexa, 2, -1): colSum = np.sum(distMat, axis = 0); # sums of columns Q = (nc - 2) * distMat - np.tile(np.matrix(colSum).T, (1, nc)) \ - np.tile(colSum, (nc, 1)) \ + np.diag(np.repeat(np.inf, nc)); #Studier & Keppler # find the indicies corresponding to the minimum value of Q # consider only upper triangle to make sure i > j format of indices # print "Q", (Q == Q.T).all() [i,j] = util.findMinInd(Q, nodePT, numTexa, precision); nodeJoin = [nodePT[i], nodePT[j]]; # Update node pointer, node name, node ID nodeName.append("(" + nodeName[nodePT[i]] + "," + nodeName[nodePT[j]] + ")"); newNodeInd = range(0,i) + range(i+1,j) + range(j+1,nc); nodePT = [nodePT[dummy] for dummy in newNodeInd]; nodePT.append(len(nodeName) - 1); # compute distance from i and j to the new node (ij) branchLen = (np.dot(colSum[[i,j]], np.array([[1,-1],[-1,1]]))/(2*(nc-2)) + distMat[i,j]/2); edgeLengths.extend(branchLen); # update newick string newickStr = util.newickStringFormat(newick, nodeJoin, branchLen, precision); newick.append(newickStr); # compute distance between the new node (ij) and remaining nodes # excluding i and j and construct new distance matrix. dist = (np.sum(distMat[np.ix_([i,j], newNodeInd)], axis = 0) - distMat[i,j]) / 2; dist = np.array([dist]); # if np.any(dist < 0): # dist = np.fmax(dist,0); #no negative distance distMatTop = np.concatenate((distMat[np.ix_(newNodeInd, newNodeInd)], dist.T), axis = 1); distMatBottom = np.append(dist, 0); distMat = np.vstack((distMatTop,[distMatBottom])); # update distance matrix nodeName.append("(" + nodeName[nodePT[0]] + "," + nodeName[nodePT[1]] + ")"); newickStr = util.newickStringFormat(newick, nodePT, distMat[[0,1],[1,0]]/2, precision) + ";"; newick.append(newickStr); edgeLengths.append(distMat[0,1]); print "Tree Building\n", nodeName print "\nNewick Tree\n", newick[-1] if dspTree: import rpy2.robjects.packages as rp tree = Phylo.read(StringIO(newick[-1]), "newick"); Phylo.draw(tree); ape = rp.importr('ape') t = ape.read_tree(text = newick[-1]) ape.plot_phylo(t, type = 'unrooted') util.estimateError(newick[-1], distMatInput, texaName); print "Total length of the tree L = ", sum(edgeLengths) if saveFlag: with open("result.nw", 'w') as f: f.write(newick[-1]); return newick[-1]
Phylo.draw(tree); ape = rp.importr('ape') t = ape.read_tree(text = newick[-1]) ape.plot_phylo(t, type = 'unrooted') util.estimateError(newick[-1], distMatInput, texaName); print "Total length of the tree L = ", sum(edgeLengths) if saveFlag: with open("result.nw", 'w') as f: f.write(newick[-1]); return newick[-1] testData = util.importDistMat('testData/TRex/distmat_6.dat'); names = testData[0]; distMat = testData[1]; test = nj(distMat, names); ### #import test_data #dataNum = 1; #distMat = test_data.distMat[dataNum] #if dataNum in test_data.names: # names = test_data.names[dataNum]; # test = nj(distMat, names); #else: # test = nj(distMat) #distMat = util.randomDistMatGen(25); #test = nj(distMat)