Example #1
0
def nj(distMat, texaName = None, precision = 5, dspTree = True, saveFlag = True):
    """
    unrooted tree, last three clusters
    """
    #check input distance matrix format
    distMat = np.array(distMat, dtype = float);
    util.checkInput(distMat); 
    
    # Initialization
    #njTree = JTree;
    distMatInput = distMat[:] #deep copy for estimate error calculation
    numTexa = distMat.shape[0];
    if texaName:
        newick = texaName[:]; 
    else:
        newick = [ch for ch in string.ascii_uppercase[0:numTexa]]; 
        texaName = newick[:];
    nodeName = newick[:]; # cluster represented by each node in nodeID
    nodePT = range(0, numTexa); # node pointer
    edgeLengths = [];
    
    # combine clusters till the number of clusters (nc) is two
    for nc in range(numTexa, 2, -1):
        colSum = np.sum(distMat, axis = 0); # sums of columns 
        Q = (nc - 2) * distMat - np.tile(np.matrix(colSum).T, (1, nc)) \
            - np.tile(colSum, (nc, 1)) \
            + np.diag(np.repeat(np.inf, nc)); #Studier & Keppler 

        # find the indicies corresponding to the minimum value of Q 
        # consider only upper triangle to make sure i > j format of indices
        # print "Q", (Q == Q.T).all()
        [i,j] = util.findMinInd(Q, nodePT, numTexa, precision);
        nodeJoin = [nodePT[i], nodePT[j]];
        
        # Update node pointer, node name, node ID
        nodeName.append("(" + nodeName[nodePT[i]] 
                            + "," + nodeName[nodePT[j]] + ")");        
        newNodeInd = range(0,i) + range(i+1,j) + range(j+1,nc);
        nodePT = [nodePT[dummy] for dummy in newNodeInd];
        nodePT.append(len(nodeName) - 1);
                
        
        # compute distance from i and j to the new node (ij)
        branchLen = (np.dot(colSum[[i,j]], np.array([[1,-1],[-1,1]]))/(2*(nc-2))
                    + distMat[i,j]/2);
        edgeLengths.extend(branchLen);
        
        # update newick string
        newickStr = util.newickStringFormat(newick, nodeJoin, 
                                               branchLen, precision);        
        newick.append(newickStr);
        
        # compute distance between the new node (ij) and remaining nodes 
        # excluding i and j and construct new distance matrix.
        dist = (np.sum(distMat[np.ix_([i,j], newNodeInd)], axis = 0)  
                - distMat[i,j]) / 2;  
        dist = np.array([dist]);
#        if np.any(dist < 0):
#            dist = np.fmax(dist,0); #no negative distance
        distMatTop = np.concatenate((distMat[np.ix_(newNodeInd, newNodeInd)],
                                           dist.T), axis = 1);
        distMatBottom = np.append(dist, 0);
        distMat = np.vstack((distMatTop,[distMatBottom])); # update distance matrix
        
    nodeName.append("(" + nodeName[nodePT[0]] 
                            + "," + nodeName[nodePT[1]] + ")"); 
    newickStr = util.newickStringFormat(newick, nodePT, 
                                           distMat[[0,1],[1,0]]/2, precision) + ";";
    newick.append(newickStr);
    edgeLengths.append(distMat[0,1]);
    print "Tree Building\n", nodeName
    print "\nNewick Tree\n", newick[-1]
    if dspTree:
        import rpy2.robjects.packages as rp
        tree = Phylo.read(StringIO(newick[-1]), "newick");     
        Phylo.draw(tree);
        ape = rp.importr('ape')
        t = ape.read_tree(text = newick[-1])
        ape.plot_phylo(t, type = 'unrooted')

    util.estimateError(newick[-1], distMatInput, texaName);
    print "Total length of the tree L = ", sum(edgeLengths)
    if saveFlag:
        with open("result.nw", 'w') as f:
            f.write(newick[-1]);
        
    return newick[-1]