def proposeCompWithDirichlet(self, theProposal): gm = ['Chain.proposeCompWithDirichlet()'] mt = self.propTree.model.parts[theProposal.pNum].comps[theProposal.mtNum] dim = self.propTree.model.parts[theProposal.pNum].dim # mt.val is a list of floats, not a numpy.ndarray #print type(mt.val), type(mt.val[0]) # The tuning is the Dirichlet alpha. #print theProposal.tuning # This method uses func.dirichlet1, which is for lists not numpy # arrays. A copy of inSeq is made, and the copy is modified and # returned. #dirichlet1(inSeq, alpha, theMin, theMax, normalizeTo1=True) newVal = func.dirichlet1(mt.val, theProposal.tuning, var.PIVEC_MIN, 1 - var.PIVEC_MIN) self.logProposalRatio = 0.0 rangeDim = range(dim) mySum = 0.0 for stNum in rangeDim: mySum += newVal[stNum] * theProposal.tuning x = pf.gsl_sf_lngamma(mySum) for stNum in rangeDim: x -= pf.gsl_sf_lngamma(newVal[stNum] * theProposal.tuning) for stNum in rangeDim: x += ((newVal[stNum] * theProposal.tuning) - 1.) * math.log( mt.val[stNum]) mySum = 0.0 for stNum in rangeDim: mySum += mt.val[stNum] * theProposal.tuning y = pf.gsl_sf_lngamma(mySum) for stNum in rangeDim: y -= pf.gsl_sf_lngamma(mt.val[stNum] * theProposal.tuning) for stNum in rangeDim: y += ((mt.val[stNum] * theProposal.tuning) - 1.) * math.log( newVal[stNum]) self.logProposalRatio = x - y mt.val = newVal # The prior here is a flat Dirichlet, ie Dirichlet(1, 1, 1, ..., # 1). If it is informative, then the prior is affected. self.logPriorRatio = 0.0
def proposeMergeRMatrix(self, theProposal): gm = ['Chain.proposeMergeRMatrix()'] mp = self.propTree.model.parts[theProposal.pNum] assert mp.rjRMatrix rDim = ((mp.dim * mp.dim) - mp.dim) / 2 p0 = theProposal.tuning # Check that k is more than 1. This should have been checked before, but check again. #print "rjRMatrix_k is currently %i, with %i rMatrices" % (mp.rjRMatrix_k, mp.nRMatrices) if mp.rjRMatrix_k <= 1: gm.append("part %i, rjRMatrix_k = %i" % (theProposal.pNum, mp.rjRMatrix_k)) pool = [c for c in mp.rMatrices if c.rj_isInPool] gm.append('len of pool = %i (should be the same as rjRMatrix_k)' % len(pool)) gm.append( "rjRMatrix_k, the pool size, should be more than 1 for a merge. This isn't." ) raise Glitch, gm # Choose two rMatrices (to make into one). They must be in the pool. pool = [c for c in mp.rMatrices if c.rj_isInPool] assert len(pool) == mp.rjRMatrix_k rm1, rm2 = random.sample(pool, 2) #print "proposing to merge rMatrices %i and %i" % (rm1.num, rm2.num) beta1 = [] beta2 = [] for n in self.propTree.iterNodesNoRoot(): theRMatrixNum = n.br.parts[theProposal.pNum].rMatrixNum if theRMatrixNum == rm1.num: beta1.append(n) elif theRMatrixNum == rm2.num: beta2.append(n) beta0 = beta1 + beta2 b1 = float(len(beta1)) b2 = float(len(beta2)) b0 = float(b1 + b2) assert len(beta0) == b0 bPrime0 = b0 + 2. bPrime1 = b1 + 1. bPrime2 = b2 + 1. f1 = rm1.rj_f f2 = rm2.rj_f f0 = f1 + f2 # Obtain rMatrix proposal s1 = random.gammavariate(p0, 1.) s2 = random.gammavariate(p0, 1.) m1 = [v * s1 for v in rm1.val] m2 = [v * s2 for v in rm2.val] #print "m1 = ", m1 #print "m2 = ", m2 #print b0, b1, b2, bPrime0, bPrime1, bPrime2 m0 = [ math.exp(((bPrime1 / bPrime0) * math.log(m1[j])) + ((bPrime2 / bPrime0) * math.log(m2[j]))) for j in range(rDim) ] #print "m0 = ", m0 s0 = sum(m0) newVal0 = [m0k / s0 for m0k in m0] # Log prior ratio # We could have a prior on the pool size, reflected in t1. If all pool sizes are equally probable, then t1 = 0 t1 = 0. if var.rjRMatrixUniformAllocationPrior: b = len([n for n in self.propTree.iterNodes()]) t2 = b * (math.log(mp.rjRMatrix_k) - math.log(mp.rjRMatrix_k - 1)) else: t2 = (b0 * math.log(f0)) - (b1 * math.log(f1)) - (b2 * math.log(f2)) # t3 is for the prior on rMatrices. With the Dirichlet prior alpha values all 1, t3 is log Gamma rDim t3 = -pf.gsl_sf_lngamma(rDim) # t4 is for the f values. if var.rjRMatrixUniformAllocationPrior: t4 = 0.0 else: # If its a uniform Dirichlet, then t4 = - log (k - 1), where k is from before the merge t4 = -math.log(mp.rjRMatrix_k - 1) self.logPriorRatio = t1 + t2 + t3 + t4 # Log proposal ratio if mp.rjRMatrix_k == mp.nRMatrices: # nRMatrices is k_max t1 = math.log(0.5) else: t1 = 0. if var.rjRMatrixUniformAllocationPrior: t2 = -(b0 * math.log(2.)) else: t2 = (b1 * math.log(f1)) + (b2 * math.log(f2)) - (b0 * math.log(f0)) # for t3, below, do some pre-calculations uu = [(bPrime1 / bPrime0) * math.sqrt(m0[j]) * (math.log(m1[j]) - math.log(m0[j])) for j in range(rDim)] sum_uu2 = sum([u * u for u in uu]) lastTerm = pf.gsl_sf_lngamma(p0) - (0.5 * sum_uu2) - \ ((rDim/2.) * math.log(2 * math.pi)) t3 = (s1 + s2 - s0) - ( (p0 - 1.) * (math.log(s1) + math.log(s2) - math.log(s0))) + lastTerm #logSterm = ((1. - p0) * (math.log(s1) + math.log(s2) - math.log(s0))) #print "s1=%.1f s2=%.1f s0=%.1f sum_uu2=%.1f logGamma(p0)=%.1f, logSterm=%.1f" % ( # s1, s2, s0, sum_uu2, pf.gsl_sf_lngamma(p0), logSterm) self.logProposalRatio = t1 + t2 + t3 #self.logProposalRatio = 20. # The Jacobian lastTerm = 0.5 * sum([math.log(v) for v in newVal0]) # new 26 sept t1 = ((((3. * rDim) - 2.) / 2.) * math.log(s0)) - ( (rDim - 1.) * (math.log(s1) + math.log(s2))) + lastTerm t2 = (2. * rDim * math.log(bPrime0)) - (rDim * (math.log(bPrime1) + math.log(bPrime2))) t3 = sum([uu[j] / (math.sqrt(s0 * newVal0[j])) for j in range(rDim)]) t3 = ((bPrime0 * (bPrime2 - bPrime1)) / (bPrime1 * bPrime2)) * t3 if var.rjRMatrixUniformAllocationPrior: self.logJacobian = -(t1 + t2 + t3) else: self.logJacobian = -(t1 + t2 + t3 + math.log(f0)) # Merge rm1 and rm2 => rm0, where rm0 is actually rm1, re-used, by # giving "0" values to rm1 = rm0 rm1.rj_f = f0 for rNum in range(rDim): rm1.val[rNum] = newVal0[rNum] for n in beta0: n.br.parts[theProposal.pNum].rMatrixNum = rm1.num pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm1.num) rm1.nNodes = b0 mp.rjRMatrix_k -= 1 rm2.rj_isInPool = False rm2.nNodes = 0
def proposeSplitRMatrix(self, theProposal): gm = ['Chain.proposeSplitRMatrix()'] # var.rjRMatrixUniformAllocationPrior True by default # theProposal.tuning 300. becomes p0 below mp = self.propTree.model.parts[theProposal.pNum] assert mp.rjRMatrix rDim = ((mp.dim * mp.dim) - mp.dim) / 2 # Check that k is less than k_max, which is nRMatrices. This should have been checked before, but check again. #print gm[0], "rjRMatrix_k is currently %i, with %i rMatrices" % (mp.rjRMatrix_k, mp.nRMatrices) assert mp.rjRMatrix_k < mp.nRMatrices # Select an existing rMatrix from the pool pool = [c for c in mp.rMatrices if c.rj_isInPool] assert mp.rjRMatrix_k == len(pool) notInPool = [c for c in mp.rMatrices if not c.rj_isInPool] assert notInPool # or else we can't split assert mp.nRMatrices == len(pool) + len(notInPool) rm0 = random.choice(pool) # The nodes currently associated with rm0 beta0 = [ n for n in self.propTree.iterNodesNoRoot() if n.br.parts[theProposal.pNum].rMatrixNum == rm0.num ] b0 = float(len(beta0)) #print gm[0], "rMatrix %i is chosen, f=%f, currently on nodes" % (rm0.num, rm0.rj_f), [n.nodeNum for n in beta0] # Divvy up the contents of beta0 into (new) beta1 and beta2, based on probability u if var.rjRMatrixUniformAllocationPrior: u = 0.5 else: u = random.random() beta1 = [] beta2 = [] for it in beta0: r = random.random() if r < u: beta1.append(it) else: beta2.append(it) b1 = float(len(beta1)) b2 = float(len(beta2)) bPrime0 = b0 + 2. bPrime1 = b1 + 1. bPrime2 = b2 + 1. # Calculation of f1 and f2 depends on u f0 = rm0.rj_f f1 = u * f0 f2 = (1.0 - u) * f0 uu = [random.normalvariate(0., 1.) for i in range(rDim)] p0 = theProposal.tuning s0 = random.gammavariate(p0, 1.) m0 = [s0 * it for it in rm0.val] #print m0 # I get a math range error here -- needs debugging. #m1 = [m0[j] * math.exp((bPrime0 * uu[j])/(bPrime1 * math.sqrt(m0[j]))) # for j in range(rDim)] #m2 = [m0[j] * math.exp((-bPrime0 * uu[j])/(bPrime2 * math.sqrt(m0[j]))) # for j in range(rDim)] safety = 0 while 1: try: m1 = [ m0[j] * math.exp( (bPrime0 * uu[j]) / (bPrime1 * math.sqrt(m0[j]))) for j in range(rDim) ] m2 = [ m0[j] * math.exp( (-bPrime0 * uu[j]) / (bPrime2 * math.sqrt(m0[j]))) for j in range(rDim) ] break except OverflowError: print "Overflow error in splitRMatrix() (%2i)" % safety safety += 1 if safety >= 100: theProposal.doAbort = True print "Too many overflows in splitComp. Aborting!" return uu = [random.normalvariate(0., 1.) for i in range(rDim)] if 0: # Long form of the above for debugging -- m1 = [] for j in range(rDim): top = (bPrime0 * uu[j]) bottom = (bPrime1 * math.sqrt(m0[j])) quot = top / bottom try: myexp = math.exp(quot) except OverflowError: gm.append("Got overflow error for m1 exp(%f) at j=%i" % (quot, j)) gm.append("bPrime0 = %f" % bPrime0) gm.append("uu[j] = %f" % uu[j]) gm.append("bPrime1 = %f" % bPrime1) gm.append("m0[j] = %f, sqrt=%f" % (m0[j], math.sqrt(m0[j]))) gm.append("m0 is %s" % m0) gm.append("top = %f" % top) gm.append("bottom = %f" % bottom) raise Glitch, gm m1.append(m0[j] * myexp) m2 = [] for j in range(rDim): top = (-bPrime0 * uu[j]) bottom = (bPrime2 * math.sqrt(m0[j])) quot = top / bottom try: myexp = math.exp(quot) except OverflowError: gm.append("Got overflow error for m2 exp(%f) at j=%i" % (quot, j)) gm.append("-bPrime0 = %f" % -bPrime0) gm.append("uu[j] = %f" % uu[j]) gm.append("bPrime2 = %f" % bPrime2) gm.append("m0[j] = %f, sqrt=%f" % (m0[j], math.sqrt(m0[j]))) gm.append("m0 is %s" % m0) gm.append("top = %f" % top) gm.append("bottom = %f" % bottom) raise Glitch, gm m2.append(m0[j] * myexp) s1 = sum(m1) s2 = sum(m2) newVal1 = [it / s1 for it in m1] newVal2 = [it / s2 for it in m2] #print newVal1 #print newVal2 if 1: # Peter adds, the following few lines to get the vals more than var.RATE_MIN isChanged = False for vNum in range(len(newVal1)): isGood = False while not isGood: #print "gen %i" % self.mcmc.gen if newVal1[vNum] < var.RATE_MIN: newVal1[vNum] = (var.RATE_MIN - newVal1[vNum]) + var.RATE_MIN isChanged = True else: isGood = True if isChanged: s1 = sum(newVal1) newVal1 = [it / s1 for it in newVal1] isChanged = False for vNum in range(len(newVal2)): isGood = False while not isGood: #print "y gen %i" % self.mcmc.gen if newVal2[vNum] < var.RATE_MIN: newVal2[vNum] = (var.RATE_MIN - newVal2[vNum]) + var.RATE_MIN isChanged = True else: isGood = True if isChanged: s2 = sum(newVal2) newVal2 = [it / s2 for it in newVal2] #print newVal1 #print newVal2 # Log prior ratio # We could have a prior on the pool size, reflected in t1. If all pool sizes are equally probable, then t1 = 0 t1 = 0. if var.rjRMatrixUniformAllocationPrior: b = len([n for n in self.propTree.iterNodesNoRoot()]) t2 = b * (math.log(mp.rjRMatrix_k) - math.log(mp.rjRMatrix_k + 1)) else: t2 = (b1 * math.log(f1)) + (b2 * math.log(f2)) - (b0 * math.log(f0)) # t3 is for the prior on rMatrices. With the Dirichlet prior alpha values all 1, t3 is log Gamma rDim t3 = pf.gsl_sf_lngamma(rDim) # t4 is for the f values. if var.rjRMatrixUniformAllocationPrior: t4 = 0.0 else: # If its a uniform Dirichlet, then t4 = log k, where k is from before the split t4 = math.log(mp.rjRMatrix_k) self.logPriorRatio = t1 + t2 + t3 + t4 # Log proposal ratio if mp.rjRMatrix_k == 1: t1 = math.log(0.5) else: t1 = 0. if var.rjRMatrixUniformAllocationPrior: t2 = b0 * math.log(2.) else: t2 = (b0 * math.log(f0)) - (b1 * math.log(f1)) - ( b2 * math.log(f2)) # this was changed 26 sept # for t3, below, do some pre-calculations sum_uu2 = sum([u * u for u in uu]) lastTerm = -pf.gsl_sf_lngamma(p0) + (0.5 * sum_uu2) + \ ((rDim/2.) * math.log(2 * math.pi)) t3 = (s0 - s1 - s2) + ( (p0 - 1.) * (math.log(s1) + math.log(s2) - math.log(s0))) + lastTerm self.logProposalRatio = t1 + t2 + t3 #print t1,t2,t3,s0,s1,s2 #self.logProposalRatio = 0. # The Jacobian lastTerm = 0.5 * sum([math.log(v) for v in rm0.val]) # added 26 sept t1 = ((((3. * rDim) - 2.) / 2.) * math.log(s0)) - ( (rDim - 1.) * (math.log(s1) + math.log(s2))) + lastTerm t2 = (2. * rDim * math.log(bPrime0)) - (rDim * (math.log(bPrime1) + math.log(bPrime2))) t3 = sum([uu[j] / (math.sqrt(s0 * rm0.val[j])) for j in range(rDim)]) t3 = ((bPrime0 * (bPrime2 - bPrime1)) / (bPrime1 * bPrime2)) * t3 if var.rjRMatrixUniformAllocationPrior: self.logJacobian = t1 + t2 + t3 else: self.logJacobian = t1 + t2 + t3 + math.log(f0) # We will now make rm1 and rm2. The rm1 will be made from rm0, # and rm2 will be popped from the notInPool list. We have newVal1 # and newVal2 which will be their vals, and we assign them to # nodes in beta1 and beta2, and give them rj_f values of f1 and # f2. rm1 = rm0 for rNum in range(rDim): rm1.val[rNum] = newVal1[rNum] rm1.rj_f = f1 for n in beta1: n.br.parts[theProposal. pNum].rMatrixNum = rm1.num # not needed, its already that. pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm1.num) rm1.nNodes = b1 rm2 = notInPool.pop() for rNum in range(rDim): rm2.val[rNum] = newVal2[rNum] rm2.rj_f = f2 for n in beta2: n.br.parts[theProposal.pNum].rMatrixNum = rm2.num pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm2.num) rm2.nNodes = b2 rm2.rj_isInPool = True self.propTree.model.parts[theProposal.pNum].rjRMatrix_k += 1
def proposeRMatrixWithSlider(self, theProposal): #print "rMatrix proposal. the tuning is %s" % theProposal.tuning assert var.rMatrixNormalizeTo1 mtCur = self.curTree.model.parts[theProposal.pNum].rMatrices[ theProposal.mtNum] mtProp = self.propTree.model.parts[theProposal.pNum].rMatrices[ theProposal.mtNum] if mtProp.spec == '2p': # For 2p, its actually a Dirichlet, not a slider. All this is # stolen from MrBayes, where the default tuning is 50. In # MrBayes, the "alphaDir" is a 2-item list of Dirichlet # parameters (not the multiplier) but they are both by default # 1, which makes the prior ratio 1.0 and the logPriorRatio # zero. old = [0.0, 0.0] old[0] = mtCur.val / (mtCur.val + 1.0) old[1] = 1.0 - old[0] new = func.dirichlet1(old, theProposal.tuning, var.KAPPA_MIN, var.KAPPA_MAX, normalizeTo1=True) mtProp.val[0] = new[0] / new[1] theSum = 0.0 for i in range(2): theSum += new[i] * theProposal.tuning x = pf.gsl_sf_lngamma(theSum) for i in range(2): x -= pf.gsl_sf_lngamma(new[i] * theProposal.tuning) for i in range(2): x += ((new[i] * theProposal.tuning) - 1.0) * math.log(old[i]) theSum = 0.0 for i in range(2): theSum += old[i] * theProposal.tuning y = pf.gsl_sf_lngamma(theSum) for i in range(2): y -= pf.gsl_sf_lngamma(old[i] * theProposal.tuning) for i in range(2): y += ((old[i] * theProposal.tuning) - 1.0) * math.log(new[i]) self.logProposalRatio = x - y else: # specified, ones, eg gtr mt = self.propTree.model.parts[theProposal.pNum].rMatrices[ theProposal.mtNum] # mt.val is a numpy array assert type(mt.val) == numpy.ndarray nRates = len(mt.val) # eg 6 for dna gtr, not 5 indxs = random.sample(range(nRates), 2) currentAplusB = mt.val[indxs[0]] + mt.val[indxs[1]] thisMin = var.RATE_MIN / currentAplusB thisMax = 1. - thisMin minToMaxDiff = thisMax - thisMin thisTuning = theProposal.tuning # It is possible that both A # and B values are very close to var.RATE_MIN, in which case # thisMin and thisMax will both be close to 0.5, and so the tuning # will be too much, requiring too many reflections. In that case, # just change the tuning temporarily. if thisTuning > minToMaxDiff: thisTuning = minToMaxDiff #print "temporarily changing the tuning for rMatrix proposal, to", thisTuning x = mt.val[indxs[0]] / currentAplusB y = x + (thisTuning * (random.random() - 0.5)) # reflect safety = -1 while 1: safety += 1 if safety > 20: gm.append( "Did more than 20 reflections -- something is wrong.") raise Glitch, gm if y < thisMin: y = thisMin + (thisMin - y) elif y > thisMax: y = thisMax - (y - thisMax) else: break #if safety > 1: # print "rMatrix reflections: ", safety mt.val[indxs[0]] = y * currentAplusB mt.val[indxs[1]] = currentAplusB - mt.val[indxs[0]] mySum = 0.0 for stNum in range(nRates): mySum += mt.val[stNum] for stNum in range(nRates): mt.val[stNum] /= mySum self.logProposalRatio = 0.0 self.logPriorRatio = 0.0