Example #1
0
def updateEDMSTI(d, seq, tree, num, clusteringType):
    newD = {}

    if clusteringType == 'NJ':
        maxSimilarity = NJUtils.getMaxSimilarityForNJ(d, seq)
    elif clusteringType == 'UPGMA':
        maxSimilarity = UPGMAUtils.getMaxSimilarityForUPGMA(d)
    elif clusteringType == 'WPGMA':
        maxSimilarity = WPGMAUtils.getMaxSimilarityForWPGMA(d)

    f = maxSimilarity['key'][0]
    iF = seq.index(f)
    numF = num[iF]

    g = maxSimilarity['key'][1]
    if f == g:
        iG = iF + 1 + seq[iF + 1:].index(g)
    else:
        iG = seq.index(g)
    numG = num[iG]

    u = '(' + f + ':' + str(d[maxSimilarity['key']]) + ':' + g + ')'

    seq.remove(f)
    seq.remove(g)

    if clusteringType == 'NJ':
        delta = NJUtils.getDeltaDistForNJ(d, f, g, u, seq)
    elif clusteringType == 'UPGMA':
        delta = UPGMAUtils.getDeltaDistForUPGMA(d, f, g, u)
    elif clusteringType == 'WPGMA':
        delta = WPGMAUtils.getDeltaDistForWPGMA(d, f, g, u)

    tree.append({'1': u, '2': f, 'dist': delta[u, f]})
    tree.append({'1': u, '2': g, 'dist': delta[u, g]})

    for i in range(len(seq)):
        for j in range(len(seq)):
            if i != j:
                newD[seq[i], seq[j]] = d[seq[i], seq[j]]

    if clusteringType == 'NJ':
        newD = NJUtils.getDistUForNJ(d, newD, f, g, u, seq)
    elif clusteringType == 'UPGMA':
        newD = UPGMAUtils.getDistUForUPGMA(d, newD, f, g, u, seq, numF, numG)
    elif clusteringType == 'WPGMA':
        newD = WPGMAUtils.getDistUForWPGMA(d, newD, f, g, u, seq)

    seq.append(u)
    num.append(numF + numG + 1)

    return newD, seq, num, tree, iF, iG
Example #2
0
def nj(distMat, texaName = None, precision = 5, dspTree = True, saveFlag = True):
    """
    unrooted tree, last three clusters
    """
    #check input distance matrix format
    distMat = np.array(distMat, dtype = float);
    util.checkInput(distMat); 
    
    # Initialization
    #njTree = JTree;
    distMatInput = distMat[:] #deep copy for estimate error calculation
    numTexa = distMat.shape[0];
    if texaName:
        newick = texaName[:]; 
    else:
        newick = [ch for ch in string.ascii_uppercase[0:numTexa]]; 
        texaName = newick[:];
    nodeName = newick[:]; # cluster represented by each node in nodeID
    nodePT = range(0, numTexa); # node pointer
    edgeLengths = [];
    
    # combine clusters till the number of clusters (nc) is two
    for nc in range(numTexa, 2, -1):
        colSum = np.sum(distMat, axis = 0); # sums of columns 
        Q = (nc - 2) * distMat - np.tile(np.matrix(colSum).T, (1, nc)) \
            - np.tile(colSum, (nc, 1)) \
            + np.diag(np.repeat(np.inf, nc)); #Studier & Keppler 

        # find the indicies corresponding to the minimum value of Q 
        # consider only upper triangle to make sure i > j format of indices
        # print "Q", (Q == Q.T).all()
        [i,j] = util.findMinInd(Q, nodePT, numTexa, precision);
        nodeJoin = [nodePT[i], nodePT[j]];
        
        # Update node pointer, node name, node ID
        nodeName.append("(" + nodeName[nodePT[i]] 
                            + "," + nodeName[nodePT[j]] + ")");        
        newNodeInd = range(0,i) + range(i+1,j) + range(j+1,nc);
        nodePT = [nodePT[dummy] for dummy in newNodeInd];
        nodePT.append(len(nodeName) - 1);
                
        
        # compute distance from i and j to the new node (ij)
        branchLen = (np.dot(colSum[[i,j]], np.array([[1,-1],[-1,1]]))/(2*(nc-2))
                    + distMat[i,j]/2);
        edgeLengths.extend(branchLen);
        
        # update newick string
        newickStr = util.newickStringFormat(newick, nodeJoin, 
                                               branchLen, precision);        
        newick.append(newickStr);
        
        # compute distance between the new node (ij) and remaining nodes 
        # excluding i and j and construct new distance matrix.
        dist = (np.sum(distMat[np.ix_([i,j], newNodeInd)], axis = 0)  
                - distMat[i,j]) / 2;  
        dist = np.array([dist]);
#        if np.any(dist < 0):
#            dist = np.fmax(dist,0); #no negative distance
        distMatTop = np.concatenate((distMat[np.ix_(newNodeInd, newNodeInd)],
                                           dist.T), axis = 1);
        distMatBottom = np.append(dist, 0);
        distMat = np.vstack((distMatTop,[distMatBottom])); # update distance matrix
        
    nodeName.append("(" + nodeName[nodePT[0]] 
                            + "," + nodeName[nodePT[1]] + ")"); 
    newickStr = util.newickStringFormat(newick, nodePT, 
                                           distMat[[0,1],[1,0]]/2, precision) + ";";
    newick.append(newickStr);
    edgeLengths.append(distMat[0,1]);
    print "Tree Building\n", nodeName
    print "\nNewick Tree\n", newick[-1]
    if dspTree:
        import rpy2.robjects.packages as rp
        tree = Phylo.read(StringIO(newick[-1]), "newick");     
        Phylo.draw(tree);
        ape = rp.importr('ape')
        t = ape.read_tree(text = newick[-1])
        ape.plot_phylo(t, type = 'unrooted')

    util.estimateError(newick[-1], distMatInput, texaName);
    print "Total length of the tree L = ", sum(edgeLengths)
    if saveFlag:
        with open("result.nw", 'w') as f:
            f.write(newick[-1]);
        
    return newick[-1]
Example #3
0
        Phylo.draw(tree);
        ape = rp.importr('ape')
        t = ape.read_tree(text = newick[-1])
        ape.plot_phylo(t, type = 'unrooted')

    util.estimateError(newick[-1], distMatInput, texaName);
    print "Total length of the tree L = ", sum(edgeLengths)
    if saveFlag:
        with open("result.nw", 'w') as f:
            f.write(newick[-1]);
        
    return newick[-1]
        


testData = util.importDistMat('testData/TRex/distmat_6.dat');
names = testData[0];
distMat = testData[1];   
test = nj(distMat, names);
###
#import test_data  
#dataNum = 1;    
#distMat = test_data.distMat[dataNum]
#if dataNum in test_data.names:
#    names = test_data.names[dataNum];
#    test = nj(distMat, names);
#else:
#    test = nj(distMat)

#distMat = util.randomDistMatGen(25);
#test = nj(distMat)