Beispiel #1
0
def _taxOrder(trees) :
  dtops = Counter(toNewick(tree, topologyOnly=1) for tree in trees)

  averagedPosteriorDistances = defaultdict(lambda : 0)

  for tx in dtops:
    tree = parseNewick(tx)
    distances = dict()
    taxaDistance(tree, tree.root, distances)
    for k in distances:
      averagedPosteriorDistances[k] += distances[k] * dtops[tx]/len(trees)

  taxa = sorted(trees[0].get_taxa())
  ntax = len(taxa)
  for j,k in allPairs(taxa) :
    averagedPosteriorDistances[k,j] = averagedPosteriorDistances[j,k]
    
  def groupDistance(g1,g2,dm) :
    return mean([dm[i] for i in itertools.product(g1,g2)])
  ## def groupDistance(g1,g2,dm) :
  ##   return min([dm[i] for i in itertools.product(g1,g2)])
  
  groups = [[x] for x in taxa]
  dm = averagedPosteriorDistances
  while len(groups) > 1:
    # find the two closest groups.
    dists = [(groupDistance(groups[j],groups[k],dm), (j,k)) 
             for j,k in allPairs(range(len(groups)))]
    dists = sorted(dists)
    d,(ij,ik) = dists[0]
    # 123 abc 0,0 0,1 1,0 02 11 20
    # 321 abc
    # 123 cba
    # 321 cba
    # abc 123
    # abc 321
    # cba 123
    # cba 321
    g1,g2 = groups[ij],groups[ik]
    def gid(g1,g2,dm) :
      d = []
      for n in range(len(g1)+len(g2) - 1) :
        for i in range(-1,max(-len(g1)-1,-(len(g2)-n+1)), -1) :
          #i > -(len(g2)-n+1)
          d.append(dm[g1[i], g2[n-(i+1)]])
      return d

    dis = gid(g1,g2,dm),gid(g1,list(reversed(g2)),dm),gid(g2, g1,dm), gid(list(reversed(g2)),g1,dm)
    dis = sorted(zip(dis,range(4)))  
    o = dis[0][1]
    if o & 1 :
      g2 = list(reversed(g2))
    if o > 1 :
      g1,g2 = g2,g1
    groups[ij] = g1 + g2
    del groups[ik]
  otaxa = groups[0]
  return otaxa
Beispiel #2
0
def getTaxaOrder(trees, refTree = None, reportTopologies = False,
                 nSample = 1, progress = False) :
  
  if progress: print >> sys.stderr, "getting topologies...",

  dtops = Counter(toNewick(tree, topologyOnly=1) for tree in trees)

  averagedPosteriorDistances = defaultdict(lambda : 0)

  if progress: print >> sys.stderr, ("taxa distance matrix (%d tops)..." % len(dtops)),
  
  for tx in dtops:
    tree = INexus.Tree(tx)
    distances = dict()
    taxaDistance(tree, tree.root, distances)
    for k in distances:
      averagedPosteriorDistances[k] += distances[k] * dtops[tx]/len(trees)
    
  if refTree is None:
    sdtops = sorted(dtops, key = lambda x : dtops[x])
    refTrees = [INexus.Tree(t) for t in sdtops[-nSample:]]
  else :
    refTrees = [refTree]

  overallMaxCR = -1
  
  for tree in refTrees :
    allOrders = getAllSingleFlipTaxaOrders(tree, tree.root)
    maximizingOrder = allOrders[0]
    treeMaxCR = getCR(maximizingOrder, averagedPosteriorDistances)

    if progress: print >> sys.stderr, "optimizing... (cr",treeMaxCR,")",
    nTries = 0

    while True:
      nTries += 1
      mnode = None

      for order,node in allOrders[1:] :
        cr = getCR(order, averagedPosteriorDistances)
        
        if cr > treeMaxCR:
          treeMaxCR = cr
          maximizingOrder = order
          mnode = node
          
      if progress: print >> sys.stderr, treeMaxCR,

      if mnode is not None:
        mnode.succ = [mnode.succ[1], mnode.succ[0]]
        allOrders = getAllSingleFlipTaxaOrders(tree, tree.root)
      else :
        break

    if treeMaxCR > overallMaxCR :
      overallMaxCR = treeMaxCR
      overallMaximizingOrder = maximizingOrder
      mtree = tree
      
    if progress: print >> sys.stderr, ("%d tries" % nTries)

  if progress: print >> sys.stderr, "done"

  if reportTopologies :
    dtops = dict([(top,[]) for top in dtops])
    for k,top in enumerate(toNewick(tree, topologyOnly=1) for tree in trees) :
      dtops[top].append(k)
      
    return (overallMaximizingOrder, mtree, dtops)
  
  return (overallMaximizingOrder, mtree)