Esempio n. 1
0
def proposeCompWithDirichlet(self, theProposal):
    gm = ['Chain.proposeCompWithDirichlet()']

    mt = self.propTree.model.parts[theProposal.pNum].comps[theProposal.mtNum]
    dim = self.propTree.model.parts[theProposal.pNum].dim

    # mt.val is a list of floats, not a numpy.ndarray
    #print type(mt.val), type(mt.val[0])

    # The tuning is the Dirichlet alpha.
    #print theProposal.tuning

    # This method uses func.dirichlet1, which is for lists not numpy
    # arrays.  A copy of inSeq is made, and the copy is modified and
    # returned.
    #dirichlet1(inSeq, alpha, theMin, theMax, normalizeTo1=True)
    newVal = func.dirichlet1(mt.val, theProposal.tuning, var.PIVEC_MIN,
                             1 - var.PIVEC_MIN)

    self.logProposalRatio = 0.0

    rangeDim = range(dim)
    mySum = 0.0
    for stNum in rangeDim:
        mySum += newVal[stNum] * theProposal.tuning
    x = pf.gsl_sf_lngamma(mySum)
    for stNum in rangeDim:
        x -= pf.gsl_sf_lngamma(newVal[stNum] * theProposal.tuning)
    for stNum in rangeDim:
        x += ((newVal[stNum] * theProposal.tuning) - 1.) * math.log(
            mt.val[stNum])

    mySum = 0.0
    for stNum in rangeDim:
        mySum += mt.val[stNum] * theProposal.tuning
    y = pf.gsl_sf_lngamma(mySum)
    for stNum in rangeDim:
        y -= pf.gsl_sf_lngamma(mt.val[stNum] * theProposal.tuning)
    for stNum in rangeDim:
        y += ((mt.val[stNum] * theProposal.tuning) - 1.) * math.log(
            newVal[stNum])
    self.logProposalRatio = x - y
    mt.val = newVal

    # The prior here is a flat Dirichlet, ie Dirichlet(1, 1, 1, ...,
    # 1).  If it is informative, then the prior is affected.
    self.logPriorRatio = 0.0
Esempio n. 2
0
def proposeMergeRMatrix(self, theProposal):
    gm = ['Chain.proposeMergeRMatrix()']

    mp = self.propTree.model.parts[theProposal.pNum]
    assert mp.rjRMatrix
    rDim = ((mp.dim * mp.dim) - mp.dim) / 2
    p0 = theProposal.tuning

    # Check that k is more than 1.  This should have been checked before, but check again.
    #print "rjRMatrix_k is currently %i, with %i rMatrices" % (mp.rjRMatrix_k, mp.nRMatrices)
    if mp.rjRMatrix_k <= 1:
        gm.append("part %i, rjRMatrix_k = %i" %
                  (theProposal.pNum, mp.rjRMatrix_k))
        pool = [c for c in mp.rMatrices if c.rj_isInPool]
        gm.append('len of pool = %i (should be the same as rjRMatrix_k)' %
                  len(pool))
        gm.append(
            "rjRMatrix_k, the pool size, should be more than 1 for a merge.  This isn't."
        )
        raise Glitch, gm

    # Choose two rMatrices (to make into one).  They must be in the pool.
    pool = [c for c in mp.rMatrices if c.rj_isInPool]
    assert len(pool) == mp.rjRMatrix_k
    rm1, rm2 = random.sample(pool, 2)
    #print "proposing to merge rMatrices %i and %i" % (rm1.num, rm2.num)

    beta1 = []
    beta2 = []
    for n in self.propTree.iterNodesNoRoot():
        theRMatrixNum = n.br.parts[theProposal.pNum].rMatrixNum
        if theRMatrixNum == rm1.num:
            beta1.append(n)
        elif theRMatrixNum == rm2.num:
            beta2.append(n)
    beta0 = beta1 + beta2
    b1 = float(len(beta1))
    b2 = float(len(beta2))
    b0 = float(b1 + b2)
    assert len(beta0) == b0
    bPrime0 = b0 + 2.
    bPrime1 = b1 + 1.
    bPrime2 = b2 + 1.
    f1 = rm1.rj_f
    f2 = rm2.rj_f
    f0 = f1 + f2

    # Obtain rMatrix proposal
    s1 = random.gammavariate(p0, 1.)
    s2 = random.gammavariate(p0, 1.)
    m1 = [v * s1 for v in rm1.val]
    m2 = [v * s2 for v in rm2.val]
    #print "m1 = ", m1
    #print "m2 = ", m2
    #print b0, b1, b2, bPrime0, bPrime1, bPrime2
    m0 = [
        math.exp(((bPrime1 / bPrime0) * math.log(m1[j])) +
                 ((bPrime2 / bPrime0) * math.log(m2[j]))) for j in range(rDim)
    ]

    #print "m0 = ", m0
    s0 = sum(m0)

    newVal0 = [m0k / s0 for m0k in m0]

    # Log prior ratio
    # We could have a prior on the pool size, reflected in t1.  If all pool sizes are equally probable, then t1 = 0
    t1 = 0.

    if var.rjRMatrixUniformAllocationPrior:
        b = len([n for n in self.propTree.iterNodes()])
        t2 = b * (math.log(mp.rjRMatrix_k) - math.log(mp.rjRMatrix_k - 1))
    else:
        t2 = (b0 * math.log(f0)) - (b1 * math.log(f1)) - (b2 * math.log(f2))

    # t3 is for the prior on rMatrices.  With the Dirichlet prior alpha values all 1, t3 is log Gamma rDim
    t3 = -pf.gsl_sf_lngamma(rDim)

    # t4 is for the f values.
    if var.rjRMatrixUniformAllocationPrior:
        t4 = 0.0
    else:
        # If its a uniform Dirichlet, then t4 = - log (k - 1), where k is from before the merge
        t4 = -math.log(mp.rjRMatrix_k - 1)

    self.logPriorRatio = t1 + t2 + t3 + t4

    # Log proposal ratio
    if mp.rjRMatrix_k == mp.nRMatrices:  # nRMatrices is k_max
        t1 = math.log(0.5)
    else:
        t1 = 0.

    if var.rjRMatrixUniformAllocationPrior:
        t2 = -(b0 * math.log(2.))
    else:
        t2 = (b1 * math.log(f1)) + (b2 * math.log(f2)) - (b0 * math.log(f0))

    # for t3, below, do some pre-calculations
    uu = [(bPrime1 / bPrime0) * math.sqrt(m0[j]) *
          (math.log(m1[j]) - math.log(m0[j])) for j in range(rDim)]
    sum_uu2 = sum([u * u for u in uu])
    lastTerm = pf.gsl_sf_lngamma(p0) - (0.5 * sum_uu2) - \
                 ((rDim/2.) * math.log(2 * math.pi))
    t3 = (s1 + s2 - s0) - (
        (p0 - 1.) * (math.log(s1) + math.log(s2) - math.log(s0))) + lastTerm

    #logSterm = ((1. - p0) * (math.log(s1) + math.log(s2) - math.log(s0)))
    #print "s1=%.1f s2=%.1f s0=%.1f    sum_uu2=%.1f  logGamma(p0)=%.1f, logSterm=%.1f" % (
    #    s1, s2, s0, sum_uu2, pf.gsl_sf_lngamma(p0), logSterm)
    self.logProposalRatio = t1 + t2 + t3

    #self.logProposalRatio = 20.

    # The Jacobian
    lastTerm = 0.5 * sum([math.log(v) for v in newVal0])  # new 26 sept
    t1 = ((((3. * rDim) - 2.) / 2.) * math.log(s0)) - (
        (rDim - 1.) * (math.log(s1) + math.log(s2))) + lastTerm
    t2 = (2. * rDim *
          math.log(bPrime0)) - (rDim * (math.log(bPrime1) + math.log(bPrime2)))
    t3 = sum([uu[j] / (math.sqrt(s0 * newVal0[j])) for j in range(rDim)])
    t3 = ((bPrime0 * (bPrime2 - bPrime1)) / (bPrime1 * bPrime2)) * t3
    if var.rjRMatrixUniformAllocationPrior:
        self.logJacobian = -(t1 + t2 + t3)
    else:
        self.logJacobian = -(t1 + t2 + t3 + math.log(f0))

    # Merge rm1 and rm2 => rm0, where rm0 is actually rm1, re-used, by
    # giving "0" values to rm1 = rm0
    rm1.rj_f = f0
    for rNum in range(rDim):
        rm1.val[rNum] = newVal0[rNum]
    for n in beta0:
        n.br.parts[theProposal.pNum].rMatrixNum = rm1.num
        pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm1.num)
    rm1.nNodes = b0
    mp.rjRMatrix_k -= 1
    rm2.rj_isInPool = False
    rm2.nNodes = 0
Esempio n. 3
0
def proposeSplitRMatrix(self, theProposal):
    gm = ['Chain.proposeSplitRMatrix()']
    # var.rjRMatrixUniformAllocationPrior  True by default
    # theProposal.tuning 300.  becomes p0 below

    mp = self.propTree.model.parts[theProposal.pNum]
    assert mp.rjRMatrix
    rDim = ((mp.dim * mp.dim) - mp.dim) / 2

    # Check that k is less than k_max, which is nRMatrices.  This should have been checked before, but check again.
    #print gm[0], "rjRMatrix_k is currently %i, with %i rMatrices" % (mp.rjRMatrix_k, mp.nRMatrices)
    assert mp.rjRMatrix_k < mp.nRMatrices

    # Select an existing rMatrix from the pool
    pool = [c for c in mp.rMatrices if c.rj_isInPool]
    assert mp.rjRMatrix_k == len(pool)
    notInPool = [c for c in mp.rMatrices if not c.rj_isInPool]
    assert notInPool  # or else we can't split
    assert mp.nRMatrices == len(pool) + len(notInPool)
    rm0 = random.choice(pool)

    # The nodes currently associated with rm0
    beta0 = [
        n for n in self.propTree.iterNodesNoRoot()
        if n.br.parts[theProposal.pNum].rMatrixNum == rm0.num
    ]
    b0 = float(len(beta0))
    #print gm[0], "rMatrix %i is chosen, f=%f, currently on nodes" % (rm0.num, rm0.rj_f), [n.nodeNum for n in beta0]

    # Divvy up the contents of beta0 into (new) beta1 and beta2, based on probability u
    if var.rjRMatrixUniformAllocationPrior:
        u = 0.5
    else:
        u = random.random()

    beta1 = []
    beta2 = []
    for it in beta0:
        r = random.random()
        if r < u:
            beta1.append(it)
        else:
            beta2.append(it)
    b1 = float(len(beta1))
    b2 = float(len(beta2))
    bPrime0 = b0 + 2.
    bPrime1 = b1 + 1.
    bPrime2 = b2 + 1.

    # Calculation of f1 and f2 depends on u
    f0 = rm0.rj_f
    f1 = u * f0
    f2 = (1.0 - u) * f0

    uu = [random.normalvariate(0., 1.) for i in range(rDim)]
    p0 = theProposal.tuning
    s0 = random.gammavariate(p0, 1.)
    m0 = [s0 * it for it in rm0.val]
    #print m0

    # I get a math range error here -- needs debugging.
    #m1 = [m0[j] * math.exp((bPrime0 * uu[j])/(bPrime1 * math.sqrt(m0[j])))
    #      for j in range(rDim)]
    #m2 = [m0[j] * math.exp((-bPrime0 * uu[j])/(bPrime2 * math.sqrt(m0[j])))
    #      for j in range(rDim)]
    safety = 0
    while 1:
        try:
            m1 = [
                m0[j] * math.exp(
                    (bPrime0 * uu[j]) / (bPrime1 * math.sqrt(m0[j])))
                for j in range(rDim)
            ]
            m2 = [
                m0[j] * math.exp(
                    (-bPrime0 * uu[j]) / (bPrime2 * math.sqrt(m0[j])))
                for j in range(rDim)
            ]
            break
        except OverflowError:
            print "Overflow error in splitRMatrix() (%2i)" % safety
            safety += 1
            if safety >= 100:
                theProposal.doAbort = True
                print "Too many overflows in splitComp.  Aborting!"
                return
            uu = [random.normalvariate(0., 1.) for i in range(rDim)]

    if 0:
        # Long form of the above for debugging --
        m1 = []
        for j in range(rDim):
            top = (bPrime0 * uu[j])
            bottom = (bPrime1 * math.sqrt(m0[j]))
            quot = top / bottom
            try:
                myexp = math.exp(quot)
            except OverflowError:
                gm.append("Got overflow error for m1 exp(%f) at j=%i" %
                          (quot, j))
                gm.append("bPrime0 = %f" % bPrime0)
                gm.append("uu[j] = %f" % uu[j])
                gm.append("bPrime1 = %f" % bPrime1)
                gm.append("m0[j] = %f, sqrt=%f" % (m0[j], math.sqrt(m0[j])))
                gm.append("m0 is %s" % m0)
                gm.append("top = %f" % top)
                gm.append("bottom = %f" % bottom)
                raise Glitch, gm
            m1.append(m0[j] * myexp)

        m2 = []
        for j in range(rDim):
            top = (-bPrime0 * uu[j])
            bottom = (bPrime2 * math.sqrt(m0[j]))
            quot = top / bottom
            try:
                myexp = math.exp(quot)
            except OverflowError:
                gm.append("Got overflow error for m2 exp(%f) at j=%i" %
                          (quot, j))
                gm.append("-bPrime0 = %f" % -bPrime0)
                gm.append("uu[j] = %f" % uu[j])
                gm.append("bPrime2 = %f" % bPrime2)
                gm.append("m0[j] = %f, sqrt=%f" % (m0[j], math.sqrt(m0[j])))
                gm.append("m0 is %s" % m0)
                gm.append("top = %f" % top)
                gm.append("bottom = %f" % bottom)
                raise Glitch, gm
            m2.append(m0[j] * myexp)

    s1 = sum(m1)
    s2 = sum(m2)
    newVal1 = [it / s1 for it in m1]
    newVal2 = [it / s2 for it in m2]

    #print newVal1
    #print newVal2

    if 1:
        # Peter adds, the following few lines to get the vals more than var.RATE_MIN
        isChanged = False
        for vNum in range(len(newVal1)):
            isGood = False
            while not isGood:
                #print "gen %i" % self.mcmc.gen
                if newVal1[vNum] < var.RATE_MIN:
                    newVal1[vNum] = (var.RATE_MIN -
                                     newVal1[vNum]) + var.RATE_MIN
                    isChanged = True
                else:
                    isGood = True
        if isChanged:
            s1 = sum(newVal1)
            newVal1 = [it / s1 for it in newVal1]

        isChanged = False
        for vNum in range(len(newVal2)):
            isGood = False
            while not isGood:
                #print "y gen %i" % self.mcmc.gen
                if newVal2[vNum] < var.RATE_MIN:
                    newVal2[vNum] = (var.RATE_MIN -
                                     newVal2[vNum]) + var.RATE_MIN
                    isChanged = True
                else:
                    isGood = True
        if isChanged:
            s2 = sum(newVal2)
            newVal2 = [it / s2 for it in newVal2]

    #print newVal1
    #print newVal2

    # Log prior ratio
    # We could have a prior on the pool size, reflected in t1.  If all pool sizes are equally probable, then t1 = 0
    t1 = 0.

    if var.rjRMatrixUniformAllocationPrior:
        b = len([n for n in self.propTree.iterNodesNoRoot()])
        t2 = b * (math.log(mp.rjRMatrix_k) - math.log(mp.rjRMatrix_k + 1))
    else:
        t2 = (b1 * math.log(f1)) + (b2 * math.log(f2)) - (b0 * math.log(f0))

    # t3 is for the prior on rMatrices.  With the Dirichlet prior alpha values all 1, t3 is log Gamma rDim
    t3 = pf.gsl_sf_lngamma(rDim)

    # t4 is for the f values.
    if var.rjRMatrixUniformAllocationPrior:
        t4 = 0.0
    else:
        # If its a uniform Dirichlet, then t4 = log k, where k is from before the split
        t4 = math.log(mp.rjRMatrix_k)

    self.logPriorRatio = t1 + t2 + t3 + t4

    # Log proposal ratio
    if mp.rjRMatrix_k == 1:
        t1 = math.log(0.5)
    else:
        t1 = 0.
    if var.rjRMatrixUniformAllocationPrior:
        t2 = b0 * math.log(2.)
    else:
        t2 = (b0 * math.log(f0)) - (b1 * math.log(f1)) - (
            b2 * math.log(f2))  # this was changed 26 sept

    # for t3, below, do some pre-calculations
    sum_uu2 = sum([u * u for u in uu])
    lastTerm = -pf.gsl_sf_lngamma(p0) + (0.5 * sum_uu2) + \
                 ((rDim/2.) * math.log(2 * math.pi))
    t3 = (s0 - s1 - s2) + (
        (p0 - 1.) * (math.log(s1) + math.log(s2) - math.log(s0))) + lastTerm
    self.logProposalRatio = t1 + t2 + t3
    #print t1,t2,t3,s0,s1,s2

    #self.logProposalRatio = 0.

    # The Jacobian
    lastTerm = 0.5 * sum([math.log(v) for v in rm0.val])  # added 26 sept
    t1 = ((((3. * rDim) - 2.) / 2.) * math.log(s0)) - (
        (rDim - 1.) * (math.log(s1) + math.log(s2))) + lastTerm
    t2 = (2. * rDim *
          math.log(bPrime0)) - (rDim * (math.log(bPrime1) + math.log(bPrime2)))
    t3 = sum([uu[j] / (math.sqrt(s0 * rm0.val[j])) for j in range(rDim)])
    t3 = ((bPrime0 * (bPrime2 - bPrime1)) / (bPrime1 * bPrime2)) * t3

    if var.rjRMatrixUniformAllocationPrior:
        self.logJacobian = t1 + t2 + t3
    else:
        self.logJacobian = t1 + t2 + t3 + math.log(f0)

    # We will now make rm1 and rm2.  The rm1 will be made from rm0,
    # and rm2 will be popped from the notInPool list.  We have newVal1
    # and newVal2 which will be their vals, and we assign them to
    # nodes in beta1 and beta2, and give them rj_f values of f1 and
    # f2.
    rm1 = rm0
    for rNum in range(rDim):
        rm1.val[rNum] = newVal1[rNum]
    rm1.rj_f = f1
    for n in beta1:
        n.br.parts[theProposal.
                   pNum].rMatrixNum = rm1.num  # not needed, its already that.
        pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm1.num)
    rm1.nNodes = b1

    rm2 = notInPool.pop()
    for rNum in range(rDim):
        rm2.val[rNum] = newVal2[rNum]
    rm2.rj_f = f2
    for n in beta2:
        n.br.parts[theProposal.pNum].rMatrixNum = rm2.num
        pf.p4_setRMatrixNum(n.cNode, theProposal.pNum, rm2.num)
    rm2.nNodes = b2
    rm2.rj_isInPool = True

    self.propTree.model.parts[theProposal.pNum].rjRMatrix_k += 1
Esempio n. 4
0
def proposeRMatrixWithSlider(self, theProposal):

    #print "rMatrix proposal. the tuning is %s" % theProposal.tuning

    assert var.rMatrixNormalizeTo1
    mtCur = self.curTree.model.parts[theProposal.pNum].rMatrices[
        theProposal.mtNum]
    mtProp = self.propTree.model.parts[theProposal.pNum].rMatrices[
        theProposal.mtNum]
    if mtProp.spec == '2p':
        # For 2p, its actually a Dirichlet, not a slider.  All this is
        # stolen from MrBayes, where the default tuning is 50.  In
        # MrBayes, the "alphaDir" is a 2-item list of Dirichlet
        # parameters (not the multiplier) but they are both by default
        # 1, which makes the prior ratio 1.0 and the logPriorRatio
        # zero.

        old = [0.0, 0.0]
        old[0] = mtCur.val / (mtCur.val + 1.0)
        old[1] = 1.0 - old[0]
        new = func.dirichlet1(old,
                              theProposal.tuning,
                              var.KAPPA_MIN,
                              var.KAPPA_MAX,
                              normalizeTo1=True)
        mtProp.val[0] = new[0] / new[1]

        theSum = 0.0
        for i in range(2):
            theSum += new[i] * theProposal.tuning
        x = pf.gsl_sf_lngamma(theSum)
        for i in range(2):
            x -= pf.gsl_sf_lngamma(new[i] * theProposal.tuning)
        for i in range(2):
            x += ((new[i] * theProposal.tuning) - 1.0) * math.log(old[i])
        theSum = 0.0
        for i in range(2):
            theSum += old[i] * theProposal.tuning
        y = pf.gsl_sf_lngamma(theSum)
        for i in range(2):
            y -= pf.gsl_sf_lngamma(old[i] * theProposal.tuning)
        for i in range(2):
            y += ((old[i] * theProposal.tuning) - 1.0) * math.log(new[i])
        self.logProposalRatio = x - y

    else:  # specified, ones, eg gtr
        mt = self.propTree.model.parts[theProposal.pNum].rMatrices[
            theProposal.mtNum]

        # mt.val is a numpy array
        assert type(mt.val) == numpy.ndarray

        nRates = len(mt.val)  # eg 6 for dna gtr, not 5
        indxs = random.sample(range(nRates), 2)
        currentAplusB = mt.val[indxs[0]] + mt.val[indxs[1]]
        thisMin = var.RATE_MIN / currentAplusB
        thisMax = 1. - thisMin

        minToMaxDiff = thisMax - thisMin
        thisTuning = theProposal.tuning

        # It is possible that both A
        # and B values are very close to var.RATE_MIN, in which case
        # thisMin and thisMax will both be close to 0.5, and so the tuning
        # will be too much, requiring too many reflections.  In that case,
        # just change the tuning temporarily.
        if thisTuning > minToMaxDiff:
            thisTuning = minToMaxDiff
            #print "temporarily changing the tuning for rMatrix proposal, to", thisTuning

        x = mt.val[indxs[0]] / currentAplusB
        y = x + (thisTuning * (random.random() - 0.5))

        # reflect
        safety = -1
        while 1:
            safety += 1
            if safety > 20:
                gm.append(
                    "Did more than 20 reflections -- something is wrong.")
                raise Glitch, gm
            if y < thisMin:
                y = thisMin + (thisMin - y)
            elif y > thisMax:
                y = thisMax - (y - thisMax)
            else:
                break
        #if safety > 1:
        #    print "rMatrix reflections: ", safety
        mt.val[indxs[0]] = y * currentAplusB
        mt.val[indxs[1]] = currentAplusB - mt.val[indxs[0]]

        mySum = 0.0
        for stNum in range(nRates):
            mySum += mt.val[stNum]
        for stNum in range(nRates):
            mt.val[stNum] /= mySum

        self.logProposalRatio = 0.0

    self.logPriorRatio = 0.0