def mpat(tr, seqs) : dseqs = dict(seqs) for n in getPostOrder(tr) : data = n.data if not n.succ : data.seq = (None,dseqs[n.data.taxon]) else : s1,s2 = [tr.node(x).data.seq for x in n.succ] if s1[1] : if s2[1] : a = calign.globalAlign(s1[1],s2[1]) data.seq = (calign.createProfile(a),None) else : p1,p2 = calign.createProfile(s1[1:]), s2[0] #assert all([sum(x)==sum(p1[0]) for x in p1]) #assert all([sum(x)==sum(p2[0]) for x in p2]) pa = calign.prof2profAlign(p1,p2) data.seq = (trimp(pa),None) #print len(pa) else : p1 = s1[0] if s2[1] : p2 = calign.createProfile(s2[1:]) else : p2 = s2[0] #assert all([sum(x)==sum(p1[0]) for x in p1]) #assert all([sum(x)==sum(p2[0]) for x in p2]) pa = calign.prof2profAlign(p1,p2) data.seq = (trimp(pa),None) #print len(pa) #import pdb; pdb.set_trace() assert n.id == tr.root return n.data.seq[0]
def mpa(tr, seqs, scores = defaultMatchScores, trimEnd = None) : dseqs = dict(seqs) #scores = (None,None,gapPenalty,feg) for n in getPostOrder(tr) : data = n.data if not n.succ : data.seq = (None,dseqs[n.data.taxon.strip("'")]) else : s1,s2 = [tr.node(x).data.seq for x in n.succ] if s1[1] : if s2[1] : a = calign.globalAlign(s1[1],s2[1], scores = scores) data.seq = (calign.createProfile(a),None) else : p1,p2 = calign.createProfile(s1[1:]), s2[0] pa = calign.prof2profAlign(p1,p2, scores = scores) data.seq = (trimendsp(pa, trimEnd) if trimEnd is not None else pa,None) #print len(pa) else : p1 = s1[0] if s2[1] : p2 = calign.createProfile(s2[1:]) else : p2 = s2[0] pa = calign.prof2profAlign(p1,p2, scores = scores) data.seq = (trimendsp(pa, trimEnd) if trimEnd is not None else pa,None) #print len(pa) #import pdb; pdb.set_trace() assert n.id == tr.root return n.data.seq[0]
def _populateTreeWithNodeToTipDistances(tree) : for n in getPostOrder(tree) : if not n.succ: n.data.dtips = [[[n,0]],[],[]] else : ch = [tree.node(c) for c in n.succ] n.data.dtips = [[[a[0],a[1]+x.data.branchlength] for a in x.data.dtips[0]] + [[a[0],a[1]+x.data.branchlength] for a in x.data.dtips[1]] for x in ch] if n.id != tree.root : n.data.dtips.append([]) _populateTipDistancesFromParent(tree, tree.node(tree.root), [])
def _UMTree_slsqp(tin, targetHeight = None, useDerives = True, doInit = False, normed = True, niter=1000, r0x = None, verb = 0, internals = False) : t = copy.deepcopy(tin) nmap = [] code = [] primecode = [] codeNodes = [] # Number of branches nr = 2*(len(t.get_terminals()) - 1) if targetHeight is None : targetHeight = treeHeightEstimate(t) assert float(targetHeight) == targetHeight and targetHeight > 0 for n in getPostOrder(t) : if n.id != t.root : if n.prev != t.root : n.data.myindx = len(nmap) n.data.hexpr = "%.14f * r[%d]" % (n.data.branchlength, n.data.myindx) c = ["0"]*(nr+2) c[n.data.myindx] = "%.14f" % n.data.branchlength n.data.hpexpr = ",".join(c) nmap.append(n.id) else : n.data.hexpr = "" n.data.hpexpr = "" if n.succ : n.data.hexpr += ' + h%d' % n.succ[0] n.data.hpexpr = cmbn(n.data.hpexpr, n.succ[0]) if n.prev != t.root and n.data.hexpr : n.data.chexpr = "h%d = %s" % (n.id, n.data.hexpr) code.append(n.data.chexpr) primecode.append("hp%d = [%s]" % (n.id, n.data.hpexpr)) else : # left,right rates, left right branch : len(nmap)-2 to len(nmap)+1 assert nr - 2 == len(nmap) lft = t.node(n.succ[0]) lft.data.myindx = nr-2 lft.data.hexpr = "r[%d] " % (nr) + lft.data.hexpr lft.data.chexpr = "h%d = %s" % (lft.id,lft.data.hexpr) sss = ",".join(["0"]*(nr) + ["1","0"]) if lft.data.hpexpr : lft.data.hpexpr = "x1+y1 for x1,y1 in zip([%s], %s)" % \ (sss,lft.data.hpexpr) else : lft.data.hpexpr = sss rht = t.node(n.succ[1]) rht.data.myindx = nr-1 brr = (lft.data.branchlength+rht.data.branchlength) rht.data.hexpr = "r[%d] " % (nr+1) + rht.data.hexpr rht.data.chexpr = "h%d = %s" % (rht.id,rht.data.hexpr) code.append(rht.data.chexpr) code.append("ch1 = h%d - %.14f" % (rht.id, targetHeight)) sss = ",".join(["0"]*(nr) + ["0","1"]) if rht.data.hpexpr : rht.data.hpexpr = "x2+y2 for x2,y2 in zip([%s], %s)" % \ (sss, rht.data.hpexpr) else : rht.data.hpexpr = sss nmap.extend(n.succ) code.append(lft.data.chexpr) primecode.append("cph1 = hp%d = [%s]" % (rht.id, rht.data.hpexpr)) primecode.append("cph2 = hp%d = [%s]" % (lft.id, lft.data.hpexpr)) code.append("ch2 = h%d - %.14f" % (lft.id, targetHeight)) brr = rht.data.branchlength + lft.data.branchlength code.append("crt = (r[%d] * r[%d] + r[%d] * r[%d]) - %.15f" % (nr-2,nr,nr-1,nr+1,brr)) sss = ",".join(["0"]*(nr-2)) + ",r[%d],r[%d],r[%d],r[%d]" % (nr,nr+1,nr-2,nr-1) primecode.append("cprt = [%s]" % sss) if n.succ and n.id != t.root: code.append("c%d = h%d - h%d" % ((n.id,) + tuple(n.succ))) primecode.append("cp%d = [x-y for x,y in zip(hp%d, hp%d)]" % ((n.id,) + tuple(n.succ))) codeNodes.append(n.id) cd = ["def fx(r) :"] cd.extend([" " + x for x in code]) ccs = ["c%d" % k for k in codeNodes] + ['ch1','ch2','crt'] hhs = ["h%d" % k for k in nmap] cd.append(" return [" + ",".join(ccs) + "], [" + ",".join(hhs) + "]") exec ( "\n".join(cd) ) in globals() cdp = ["def fxp(r) :"] cdp.extend([" " + x for x in primecode]) ccs = ["cp%d" % k for k in codeNodes] + ['cph1','cph2','cprt'] cdp.append(" return [" + ",".join(ccs) + "]") exec ( "\n".join(cdp) ) in globals() # optimization target if r0x is not None : r0 = r0x else : if doInit: calcNoderateMultipliers(t, targetHeight) cl,cr = t.node(t.root).succ btl,btr = (t.node(cl).data.timebranchlength , t.node(cr).data.timebranchlength) r0 = [t.node(c).data.timebranchlength/t.node(c).data.branchlength for c in nmap] + \ [btl, btr] r0[-3] = 1/r0[-3] r0[-4] = 1/r0[-4] else : r0 = [1]*(nr) + [targetHeight*.1]*2 # most are constant, can improve #derv = array(fxp(0)) assert nr == len(nmap) slsqp = scipy.optimize.slsqp.fmin_slsqp if useDerives : fm,fmp = (_targetNormedVar,_targetNormedVarDerivatives) if normed \ else (_targetVar, _targetVarDerivatives) re = slsqp(fm, r0, fprime = lambda x : array(fmp(x)), f_eqcons = lambda x : array(fx(x)[0]), fprime_eqcons = lambda x : array(fxp(x)), bounds = [(1e-10,10)]*nr + [(0,targetHeight)]*2, iter = niter, iprint=verb, full_output=1) else : re = slsqp(fm, r0, f_eqcons = lambda x : array(fx(x)[0]), bounds = [(1e-10,10)]*nr + [(0,targetHeight)]*2, iprint=verb, full_output=1) r = re[0] if re[3] != 0 : if not (re[3] == 8 and (abs(targetHeight - r[-2]) < 1e-9 or abs(targetHeight - r[-1]) < 1e-9)) : ok = re[3] in [8,9] and all([abs(x) < 1e-8 for x in fx(r)[0]]) if not ok : import pdb ; pdb.set_trace() raise RuntimeError(re[4]) hs = fx(r)[1] for k,i in enumerate(nmap) : n = t.node(i) n.data.ph = hs[k] n.data.subsbranchlength = n.data.branchlength n.data.attributes = {'clockrate' : r[n.data.myindx]} if n.succ : n.data.branchlength = n.data.ph - t.node(n.succ[0]).data.ph else : n.data.branchlength = n.data.ph n.data.branchlength = max(n.data.branchlength, 0) if internals : return t, fm(r), r, (fx, fxp, fm, fmp) return t, fm(r)