def _taxOrder(trees) : dtops = Counter(toNewick(tree, topologyOnly=1) for tree in trees) averagedPosteriorDistances = defaultdict(lambda : 0) for tx in dtops: tree = parseNewick(tx) distances = dict() taxaDistance(tree, tree.root, distances) for k in distances: averagedPosteriorDistances[k] += distances[k] * dtops[tx]/len(trees) taxa = sorted(trees[0].get_taxa()) ntax = len(taxa) for j,k in allPairs(taxa) : averagedPosteriorDistances[k,j] = averagedPosteriorDistances[j,k] def groupDistance(g1,g2,dm) : return mean([dm[i] for i in itertools.product(g1,g2)]) ## def groupDistance(g1,g2,dm) : ## return min([dm[i] for i in itertools.product(g1,g2)]) groups = [[x] for x in taxa] dm = averagedPosteriorDistances while len(groups) > 1: # find the two closest groups. dists = [(groupDistance(groups[j],groups[k],dm), (j,k)) for j,k in allPairs(range(len(groups)))] dists = sorted(dists) d,(ij,ik) = dists[0] # 123 abc 0,0 0,1 1,0 02 11 20 # 321 abc # 123 cba # 321 cba # abc 123 # abc 321 # cba 123 # cba 321 g1,g2 = groups[ij],groups[ik] def gid(g1,g2,dm) : d = [] for n in range(len(g1)+len(g2) - 1) : for i in range(-1,max(-len(g1)-1,-(len(g2)-n+1)), -1) : #i > -(len(g2)-n+1) d.append(dm[g1[i], g2[n-(i+1)]]) return d dis = gid(g1,g2,dm),gid(g1,list(reversed(g2)),dm),gid(g2, g1,dm), gid(list(reversed(g2)),g1,dm) dis = sorted(zip(dis,range(4))) o = dis[0][1] if o & 1 : g2 = list(reversed(g2)) if o > 1 : g1,g2 = g2,g1 groups[ij] = g1 + g2 del groups[ik] otaxa = groups[0] return otaxa
def getTaxaOrder(trees, refTree = None, reportTopologies = False, nSample = 1, progress = False) : if progress: print >> sys.stderr, "getting topologies...", dtops = Counter(toNewick(tree, topologyOnly=1) for tree in trees) averagedPosteriorDistances = defaultdict(lambda : 0) if progress: print >> sys.stderr, ("taxa distance matrix (%d tops)..." % len(dtops)), for tx in dtops: tree = INexus.Tree(tx) distances = dict() taxaDistance(tree, tree.root, distances) for k in distances: averagedPosteriorDistances[k] += distances[k] * dtops[tx]/len(trees) if refTree is None: sdtops = sorted(dtops, key = lambda x : dtops[x]) refTrees = [INexus.Tree(t) for t in sdtops[-nSample:]] else : refTrees = [refTree] overallMaxCR = -1 for tree in refTrees : allOrders = getAllSingleFlipTaxaOrders(tree, tree.root) maximizingOrder = allOrders[0] treeMaxCR = getCR(maximizingOrder, averagedPosteriorDistances) if progress: print >> sys.stderr, "optimizing... (cr",treeMaxCR,")", nTries = 0 while True: nTries += 1 mnode = None for order,node in allOrders[1:] : cr = getCR(order, averagedPosteriorDistances) if cr > treeMaxCR: treeMaxCR = cr maximizingOrder = order mnode = node if progress: print >> sys.stderr, treeMaxCR, if mnode is not None: mnode.succ = [mnode.succ[1], mnode.succ[0]] allOrders = getAllSingleFlipTaxaOrders(tree, tree.root) else : break if treeMaxCR > overallMaxCR : overallMaxCR = treeMaxCR overallMaximizingOrder = maximizingOrder mtree = tree if progress: print >> sys.stderr, ("%d tries" % nTries) if progress: print >> sys.stderr, "done" if reportTopologies : dtops = dict([(top,[]) for top in dtops]) for k,top in enumerate(toNewick(tree, topologyOnly=1) for tree in trees) : dtops[top].append(k) return (overallMaximizingOrder, mtree, dtops) return (overallMaximizingOrder, mtree)