def likelihoodAB(cascade, mu=None, B=None, omega=None, omegao=None, model='modelB'):

    lhoodA = Cascade(cascade.L)
    lhoodB = Cascade(cascade.L)
    if model=='modelC':
        lhoodC = Cascade(cascade.L)

    for j in xrange(cascade.J):
        if model=='modelA':
            lhoodA.value[j] = cascade.total[j]*nplog(0.5)
            lhoodB.value[j] = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j])

        elif model=='modelB':
            lhoodA.value[j] = cascade.total[j]*nplog(0.5)
            lhoodB.value[j] = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \
                    - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j])

        elif model=='modelC':
            lhoodA.value[j] = gammaln(cascade.value[j]+0.5*omega.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omega.value[j]) \
                    - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - 2*gammaln(0.5*omega.value[j])
            lhoodB.value[j] = gammaln(cascade.value[j]+B.value[j]*omega.value[j]) \
                    + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*omega.value[j]) \
                    - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - gammaln(B.value[j]*omega.value[j]) \
                    - gammaln((1-B.value[j])*omega.value[j])
            lhoodC.value[j] = gammaln(cascade.value[j]+0.5*omegao.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omegao.value[j]) \
                    - gammaln(cascade.total[j]+omegao.value[j]) + gammaln(omegao.value[j]) - 2*gammaln(0.5*omegao.value[j])

    if model=='modelC':
        return lhoodA, lhoodB, lhoodC
    else:
        return lhoodA, lhoodB
Beispiel #2
0
def bayes_optimal_estimator(cascade, pi, mu):

    R = Cascade(cascade.L)
    for j in range(pi.J):
        ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln(cascade.value[j].sum(0)+mu.estim[j]) \
            + gammaln(cascade.total[j].sum(0)-cascade.value[j].sum(0)+mu.estim[j]) \
            - gammaln(cascade.total[j].sum(0)+2*mu.estim[j]) \
            + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - cascade.total[j].sum(0)*nplog(0.5)
        R.value[j] = 0.5*logistic(ratio) \
            + (cascade.value[j].sum(0)+mu.estim[j])/(cascade.total[j].sum(0)+mu.estim[j])*logistic(-ratio)

    return R
    def update_Estep(self, cascade, eta, pi, mu=None, B=None, omega=None, omegao=None):

        if self.model=='modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=self.model)
        elif self.model=='modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=self.model)
        elif self.model=='modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=self.model)

        for j in xrange(self.J):
            log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) \
                + outsum(eta.estim[:,1:]*(lhoodA.value[j]-lhoodB.value[j]))/outsum(eta.estim[:,1:])
            self.value[j] = newlogistic(-log_posterior_odds)
    def update_Estep(self, cascade, scores, alpha, beta, tau, pi, gamma, mu=None, B=None, omega=None, omegao=None): 

        footprint_logodds = np.zeros((self.N,1),dtype=float)
        if gamma.model=='modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
        elif gamma.model=='modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
        elif gamma.model=='modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

        for j in xrange(pi.J):
            footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \
                    + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

        self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
            + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
            + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
            + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
            + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))
        self.estim[:,0] = 0.
        self.estim[self.estim==np.inf] = MAX
        self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1))
        self.estim = self.estim/insum(self.estim,[1])

        if np.isnan(self.estim).any():
            print "Nan in Eta"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Eta"
            raise ValueError
def likelihood(cascade, scores, eta, gamma, pi, alpha, beta, tau, mu=None, B=None, omega=None, omegao=None):

    apriori = beta.estim[0] + beta.estim[1]*scores

    if gamma.model=='modelA':
        lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
    elif gamma.model=='modelB':
        lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
    elif gamma.model=='modelC':
        lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

    footprint = np.zeros((cascade.N,1),dtype=float)
    for j in xrange(pi.J):
        footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \
                + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

    P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \
        + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1])
    P_1[P_1==np.inf] = MAX
    P_1[P_1==-np.inf] = -MAX

    null = np.zeros((cascade.N,1),dtype=float)
    for j in xrange(cascade.J):
        if gamma.model=='modelC':
            null = null + insum(lhoodC.value[j],[1])
        else:
            null = null + insum(lhoodA.value[j],[1])
    P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \
        + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0])
    P_0[P_0==np.inf] = MAX
    P_0[P_0==-np.inf] = -MAX

    L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \
        - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1])
    
    L = L.sum()

    if np.isnan(L):
        print "Nan in LogLike"
        raise ValueError

    if np.isinf(L):
        print "Inf in LogLike"
        raise ValueError

    return L
 def F(x):
     arg = x[0] + x[1] * scores
     func = arg * insum(eta.estim[:, 1:], 1) - nplog(1 + np.exp(arg))
     f = -1. * func.sum()
     if np.isnan(f) or np.isinf(f):
         return np.inf
     else:
         return f
 def F(x):
     arg = x[0]+x[1]*scores
     func = arg*insum(eta.estim[:,1:],1) - nplog(1+np.exp(arg))
     f = -1.*func.sum()
     if np.isnan(f) or np.isinf(f):
         return np.inf
     else:
         return f
Beispiel #8
0
def loglikelihood(cascade, gamma, pi, B=None, mu=None, tau=None):

    L = 0.
    for j in xrange(cascade.J):
        if gamma.model=='modelA':
            lhoodA = cascade.total[j]*nplog(0.5)
            lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j])

        elif gamma.model=='modelB':
            lhoodA = cascade.total[j]*nplog(0.5)
            lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \
                    - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j])

        elif gamma.model=='modelC':
            lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \
                    - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j])
            lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \
                    + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \
                    - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \
                    - gammaln((1-B.value[j])*tau.value[j])

        L += np.sum(gamma.value[j]*lhoodA.sum(0) + (1-gamma.value[j])*lhoodB.sum(0) \
            + cascade.N*(gamma.value[j]*nplog(pi.estim[j]) + (1-gamma.value[j])*nplog(1-pi.estim[j]) \
            - gamma.value[j]*nplog(gamma.value[j]) - (1-gamma.value[j])*nplog(1-gamma.value[j])))

    return L
def bayes_optimal_estimator(cascade, pi, mu):

    R = Cascade(cascade.L)
    for j in range(pi.J):
        ratio = (
            nplog(1 - pi.estim[j])
            - nplog(pi.estim[j])
            + gammaln(cascade.value[j].sum(0) + mu.estim[j])
            + gammaln(cascade.total[j].sum(0) - cascade.value[j].sum(0) + mu.estim[j])
            - gammaln(cascade.total[j].sum(0) + 2 * mu.estim[j])
            + gammaln(2 * mu.estim[j])
            - 2 * gammaln(mu.estim[j])
            - cascade.total[j].sum(0) * nplog(0.5)
        )
        R.value[j] = 0.5 * logistic(ratio) + (cascade.value[j].sum(0) + mu.estim[j]) / (
            cascade.total[j].sum(0) + mu.estim[j]
        ) * logistic(-ratio)

    return R
    def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \
        pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None):

        self.N = cascade.N
        self.total = totalreads.reshape(self.N, 1)

        self.estim = np.zeros((self.N, 2), dtype=float)
        if alpha is None:
            indices = np.argsort(self.total.ravel())[:self.N / 2]
            self.estim[indices, 1:] = -MAX
            indices = np.argsort(self.total.ravel())[self.N / 2:]
            self.estim[indices, 1:] = MAX
        else:
            footprint_logodds = np.zeros((self.N, 1), dtype=float)
            if gamma.model == 'modelA':
                lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
            elif gamma.model == 'modelB':
                lhoodA, lhoodB = likelihoodAB(cascade,
                                              mu=mu,
                                              model=gamma.model)
            elif gamma.model == 'modelC':
                lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                                      B=B,
                                                      omega=omega,
                                                      omegao=omegao,
                                                      model=gamma.model)

            for j in xrange(pi.J):
                if model == 'modelC':
                    footprint_logodds += insum(
                        gamma.value[j] * lhoodA.value[j] - lhoodC.value[j] +
                        (1 - gamma.value[j]) * lhoodB.value[j], [1])
                else:
                    footprint_logodds += insum(
                        (1 - gamma.value[j]) *
                        (lhoodB.value[j] - lhoodA.value[j]), [1])
                footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

            self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
                + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
                + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
                + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
                + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))

        if alpha is None:
            self.estim[self.estim == np.inf] = MAX
            self.estim = np.exp(self.estim -
                                np.max(self.estim, 1).reshape(self.N, 1))
            self.estim = self.estim / insum(self.estim, [1])
        else:
            self.estim[:, 1:] = self.estim[:, 1:] / np.log(10)
    def update(self, cascade, pi, mu=None, B=None, tau=None):

        for j in xrange(self.J):
            if self.model == "modelA":
                lhoodA = cascade.total[j] * nplog(0.5)
                lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog(
                    1 - B.value[j]
                )

            elif self.model == "modelB":
                lhoodA = cascade.total[j] * nplog(0.5)
                lhoodB = (
                    gammaln(cascade.value[j] + mu.estim[j])
                    + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j])
                    - gammaln(cascade.total[j] + 2 * mu.estim[j])
                    + gammaln(2 * mu.estim[j])
                    - 2 * gammaln(mu.estim[j])
                )

            elif self.model == "modelC":
                lhoodA = (
                    gammaln(cascade.value[j] + 0.5 * tau.value[j])
                    + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j])
                    - gammaln(cascade.total[j] + tau.value[j])
                    + gammaln(tau.value[j])
                    - 2 * gammaln(0.5 * tau.value[j])
                )
                lhoodB = (
                    gammaln(cascade.value[j] + B.value[j] * tau.value[j])
                    + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j])
                    - gammaln(cascade.total[j] + tau.value[j])
                    + gammaln(tau.value[j])
                    - gammaln(B.value[j] * tau.value[j])
                    - gammaln((1 - B.value[j]) * tau.value[j])
                )

            log_posterior_odds = (
                nplog(pi.estim[j]) - nplog(1 - pi.estim[j]) + 1.0 / cascade.N * (lhoodA.sum(0) - lhoodB.sum(0))
            )
            self.value[j] = logistic(-log_posterior_odds)
Beispiel #12
0
    def update(self, cascade, pi, mu=None, B=None, tau=None):

        for j in xrange(self.J):
            if self.model=='modelA':
                lhoodA = cascade.total[j]*nplog(0.5)
                lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j])

            elif self.model=='modelB':
                lhoodA = cascade.total[j]*nplog(0.5)
                lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \
                        - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j])

            elif self.model=='modelC':
                lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \
                        - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j])
                lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \
                        + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \
                        - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \
                        - gammaln((1-B.value[j])*tau.value[j])

            log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) + 1./cascade.N*(lhoodA.sum(0) - lhoodB.sum(0))
            self.value[j] = logistic(-log_posterior_odds)
    def update_Estep(self,
                     cascade,
                     scores,
                     alpha,
                     beta,
                     tau,
                     pi,
                     gamma,
                     mu=None,
                     B=None,
                     omega=None,
                     omegao=None):

        footprint_logodds = np.zeros((self.N, 1), dtype=float)
        if gamma.model == 'modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
        elif gamma.model == 'modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
        elif gamma.model == 'modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                                  B=B,
                                                  omega=omega,
                                                  omegao=omegao,
                                                  model=gamma.model)

        for j in xrange(pi.J):
            footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \
                    + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

        self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
            + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
            + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
            + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
            + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))
        self.estim[:, 0] = 0.
        self.estim[self.estim == np.inf] = MAX
        self.estim = np.exp(self.estim -
                            np.max(self.estim, 1).reshape(self.N, 1))
        self.estim = self.estim / insum(self.estim, [1])

        if np.isnan(self.estim).any():
            print "Nan in Eta"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Eta"
            raise ValueError
def logposteriorodds_poissonbinomial(reads, gamma, pi, parameters):

    N, L = reads.shape
    cascade = Cascade(L)
    cascade.setreads(reads)
    logodds = np.zeros((N,), dtype=float)

    if gamma.model == "modelA":
        B = parameters
    elif gamma.model == "modelB":
        mu = parameters
    elif gamma.model == "modelC":
        B, tau = parameters

    for j in xrange(pi.J):
        if gamma.model == "modelA":
            lhoodA = cascade.total[j] * nplog(0.5)
            lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog(
                1 - B.value[j]
            )

        elif gamma.model == "modelB":
            lhoodA = cascade.total[j] * nplog(0.5)
            lhoodB = (
                gammaln(cascade.value[j] + mu.estim[j])
                + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j])
                - gammaln(cascade.total[j] + 2 * mu.estim[j])
                + gammaln(2 * mu.estim[j])
                - 2 * gammaln(mu.estim[j])
            )

        elif gamma.model == "modelC":
            lhoodA = (
                gammaln(cascade.value[j] + 0.5 * tau.value[j])
                + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j])
                - gammaln(cascade.total[j] + tau.value[j])
                + gammaln(tau.value[j])
                - 2 * gammaln(0.5 * tau.value[j])
            )
            lhoodB = (
                gammaln(cascade.value[j] + B.value[j] * tau.value[j])
                + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j])
                - gammaln(cascade.total[j] + tau.value[j])
                + gammaln(tau.value[j])
                - gammaln(B.value[j] * tau.value[j])
                - gammaln((1 - B.value[j]) * tau.value[j])
            )

        logratio = nplog(1 - pi.estim[j]) + lhoodB - nplog(pi.estim[j]) - lhoodA
        logodds += np.sum(nplog(pi.estim[j]) - nplog(logistic(logratio)), 1)

    return logodds
def likelihoodAB(cascade,
                 mu=None,
                 B=None,
                 omega=None,
                 omegao=None,
                 model='modelB'):

    lhoodA = Cascade(cascade.L)
    lhoodB = Cascade(cascade.L)
    if model == 'modelC':
        lhoodC = Cascade(cascade.L)

    for j in xrange(cascade.J):
        if model == 'modelA':
            lhoodA.value[j] = cascade.total[j] * nplog(0.5)
            lhoodB.value[j] = cascade.value[j] * nplog(B.value[j]) + (
                cascade.total[j] - cascade.value[j]) * nplog(1 - B.value[j])

        elif model == 'modelB':
            lhoodA.value[j] = cascade.total[j] * nplog(0.5)
            lhoodB.value[j] = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \
                    - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j])

        elif model == 'modelC':
            lhoodA.value[j] = gammaln(cascade.value[j]+0.5*omega.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omega.value[j]) \
                    - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - 2*gammaln(0.5*omega.value[j])
            lhoodB.value[j] = gammaln(cascade.value[j]+B.value[j]*omega.value[j]) \
                    + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*omega.value[j]) \
                    - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - gammaln(B.value[j]*omega.value[j]) \
                    - gammaln((1-B.value[j])*omega.value[j])
            lhoodC.value[j] = gammaln(cascade.value[j]+0.5*omegao.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omegao.value[j]) \
                    - gammaln(cascade.total[j]+omegao.value[j]) + gammaln(omegao.value[j]) - 2*gammaln(0.5*omegao.value[j])

    if model == 'modelC':
        return lhoodA, lhoodB, lhoodC
    else:
        return lhoodA, lhoodB
    def update_Estep(self,
                     cascade,
                     eta,
                     pi,
                     mu=None,
                     B=None,
                     omega=None,
                     omegao=None):

        if self.model == 'modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=self.model)
        elif self.model == 'modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=self.model)
        elif self.model == 'modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                                  B=B,
                                                  omega=omega,
                                                  omegao=omegao,
                                                  model=self.model)

        for j in xrange(self.J):
            log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) \
                + outsum(eta.estim[:,1:]*(lhoodA.value[j]-lhoodB.value[j]))/outsum(eta.estim[:,1:])
            self.value[j] = newlogistic(-log_posterior_odds)
def loglikelihood(cascade, gamma, pi, B=None, mu=None, tau=None):

    L = 0.0
    for j in xrange(cascade.J):
        if gamma.model == "modelA":
            lhoodA = cascade.total[j] * nplog(0.5)
            lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog(
                1 - B.value[j]
            )

        elif gamma.model == "modelB":
            lhoodA = cascade.total[j] * nplog(0.5)
            lhoodB = (
                gammaln(cascade.value[j] + mu.estim[j])
                + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j])
                - gammaln(cascade.total[j] + 2 * mu.estim[j])
                + gammaln(2 * mu.estim[j])
                - 2 * gammaln(mu.estim[j])
            )

        elif gamma.model == "modelC":
            lhoodA = (
                gammaln(cascade.value[j] + 0.5 * tau.value[j])
                + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j])
                - gammaln(cascade.total[j] + tau.value[j])
                + gammaln(tau.value[j])
                - 2 * gammaln(0.5 * tau.value[j])
            )
            lhoodB = (
                gammaln(cascade.value[j] + B.value[j] * tau.value[j])
                + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j])
                - gammaln(cascade.total[j] + tau.value[j])
                + gammaln(tau.value[j])
                - gammaln(B.value[j] * tau.value[j])
                - gammaln((1 - B.value[j]) * tau.value[j])
            )

        L += np.sum(
            gamma.value[j] * lhoodA.sum(0)
            + (1 - gamma.value[j]) * lhoodB.sum(0)
            + cascade.N
            * (
                gamma.value[j] * nplog(pi.estim[j])
                + (1 - gamma.value[j]) * nplog(1 - pi.estim[j])
                - gamma.value[j] * nplog(gamma.value[j])
                - (1 - gamma.value[j]) * nplog(1 - gamma.value[j])
            )
        )

    return L
    def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \
        pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None):

        self.N = cascade.N
        self.total = totalreads.reshape(self.N,1)

        self.estim = np.zeros((self.N, 2),dtype=float)
        if alpha is None:
            indices = np.argsort(self.total.ravel())[:self.N/2]
            self.estim[indices,1:] = -MAX
            indices = np.argsort(self.total.ravel())[self.N/2:]
            self.estim[indices,1:] = MAX
        else:
            footprint_logodds = np.zeros((self.N,1),dtype=float)
            if gamma.model=='modelA':
                lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
            elif gamma.model=='modelB':
                lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
            elif gamma.model=='modelC':
                lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

            for j in xrange(pi.J):
                if model=='modelC':
                    footprint_logodds += insum(gamma.value[j]*lhoodA.value[j]-lhoodC.value[j]+(1-gamma.value[j])*lhoodB.value[j],[1])
                else:
                    footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]),[1])
                footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

            self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
                + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
                + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
                + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
                + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))

        if alpha is None:
            self.estim[self.estim==np.inf] = MAX
            self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1))
            self.estim = self.estim/insum(self.estim,[1])
        else:
            self.estim[:,1:] = self.estim[:,1:]/np.log(10)
    def update_Mstep(self, eta, tau):

        etaestim = np.zeros((eta.estim.shape[0], 2), dtype=float)
        etaestim[:, 0] = eta.estim[:, 0]
        etaestim[:, 1] = eta.estim[:, 1:].sum(1)

        C = nplog(tau.estim) * outsum(etaestim)

        def F(x):
            func = outsum(gammaln(eta.total+x)*etaestim) \
                - gammaln(x)*outsum(etaestim) + C*x
            f = -1. * func.sum()
            if np.isnan(f) or np.isinf(f):
                return np.inf
            else:
                return f

        def Fprime(x):
            df = outsum(digamma(eta.total+x)*etaestim) \
                - digamma(x)*outsum(etaestim) + C
            Df = -1. * df.ravel()
            if np.isnan(Df).any() or np.isinf(Df).any():
                return np.array([np.inf, np.inf])
            else:
                return Df

        bounds = [(0, None), (0, None)]
        xo = self.estim.copy()
        solution = opt.fmin_l_bfgs_b(F,
                                     xo,
                                     fprime=Fprime,
                                     bounds=bounds,
                                     disp=0)
        self.estim = solution[0]

        if np.isnan(self.estim).any():
            print "Nan in Alpha"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Alpha"
            raise ValueError
def bayes_optimal_estimator(cascade, eta, pi, B=None, mu=None, model='modelA'):
    """
    computes the posterior mean conditional on the most likely
    set of states for gamma.
    """

    M1 = Cascade(cascade.L)
    M2 = Cascade(cascade.L)
    if isinstance(eta, Eta):
        states = eta.estim[:, 1:] > 0.5
    else:
        states = eta[:, 1:]

    if not isinstance(pi, Pi):
        pitmp = Pi(cascade.J)
        pitmp.estim = pi
        pi = pitmp

    if model == 'modelA':
        for j in range(pi.J):
            ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + (cascade.value[j]*states).sum(0)*nplog(B.value[j]) \
                + ((cascade.total[j]-cascade.value[j])*states).sum(0)*nplog(1-B.value[j]) \
                - (cascade.total[j]*states).sum(0)*nplog(0.5)
            M1.value[j] = 0.5 * newlogistic(ratio) + B.value[j] * newlogistic(
                -ratio)
            M2.value[j] = 0.25 * newlogistic(
                ratio) + B.value[j]**2 * newlogistic(-ratio)
    elif model == 'modelB':
        if not isinstance(mu, Mu):
            mutmp = Mu(cascade.J)
            mutmp.estim = mu
            mu = mutmp

        for j in range(pi.J):
            ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln((cascade.value[j]*states).sum(0)+mu.estim[j]) \
                + gammaln((cascade.total[j]*states).sum(0)-(cascade.value[j]*states).sum(0)+mu.estim[j]) \
                - gammaln((cascade.total[j]*states).sum(0)+2*mu.estim[j]) \
                + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - (cascade.total[j]*states).sum(0)*nplog(0.5)
            M1.value[j] = 0.5*newlogistic(ratio) \
                + ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio)
            M2.value[j] = 0.25*newlogistic(ratio) \
                + ((cascade.value[j]*states).sum(0)+mu.estim[j]+1)/((cascade.total[j]*states).sum(0)+mu.estim[j]+1) \
                * ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio)
    elif model == 'modelC':
        raise NotImplementedError

    return M1, M2
    def update_Mstep(self, eta, tau):

        etaestim = np.zeros((eta.estim.shape[0],2),dtype=float)
        etaestim[:,0] = eta.estim[:,0]
        etaestim[:,1] = eta.estim[:,1:].sum(1)

        C = nplog(tau.estim)*outsum(etaestim)

        def F(x):
            func = outsum(gammaln(eta.total+x)*etaestim) \
                - gammaln(x)*outsum(etaestim) + C*x
            f = -1.*func.sum()
            if np.isnan(f) or np.isinf(f):
                return np.inf
            else:
                return f

        def Fprime(x):
            df = outsum(digamma(eta.total+x)*etaestim) \
                - digamma(x)*outsum(etaestim) + C
            Df = -1.*df.ravel()
            if np.isnan(Df).any() or np.isinf(Df).any():
                return np.array([np.inf, np.inf])
            else:
                return Df

        bounds = [(0, None), (0, None)]
        xo = self.estim.copy()
        solution = opt.fmin_l_bfgs_b(F, xo, fprime=Fprime, bounds=bounds, disp=0)
        self.estim = solution[0]

        if np.isnan(self.estim).any():
            print "Nan in Alpha"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Alpha"
            raise ValueError
def bayes_optimal_estimator(cascade, eta, pi, B=None, mu=None, model='modelA'):
    """
    computes the posterior mean conditional on the most likely
    set of states for gamma.
    """

    M1 = Cascade(cascade.L)
    M2 = Cascade(cascade.L)
    if isinstance(eta, Eta):
        states = eta.estim[:,1:]>0.5
    else:
        states = eta[:,1:]

    if not isinstance(pi, Pi):
        pitmp = Pi(cascade.J)
        pitmp.estim = pi
        pi = pitmp

    if model=='modelA':
        for j in range(pi.J):
            ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + (cascade.value[j]*states).sum(0)*nplog(B.value[j]) \
                + ((cascade.total[j]-cascade.value[j])*states).sum(0)*nplog(1-B.value[j]) \
                - (cascade.total[j]*states).sum(0)*nplog(0.5)
            M1.value[j] = 0.5*newlogistic(ratio) + B.value[j]*newlogistic(-ratio)
            M2.value[j] = 0.25*newlogistic(ratio) + B.value[j]**2*newlogistic(-ratio)
    elif model=='modelB':
        if not isinstance(mu, Mu):
            mutmp = Mu(cascade.J)
            mutmp.estim = mu
            mu = mutmp

        for j in range(pi.J):
            ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln((cascade.value[j]*states).sum(0)+mu.estim[j]) \
                + gammaln((cascade.total[j]*states).sum(0)-(cascade.value[j]*states).sum(0)+mu.estim[j]) \
                - gammaln((cascade.total[j]*states).sum(0)+2*mu.estim[j]) \
                + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - (cascade.total[j]*states).sum(0)*nplog(0.5)
            M1.value[j] = 0.5*newlogistic(ratio) \
                + ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio)
            M2.value[j] = 0.25*newlogistic(ratio) \
                + ((cascade.value[j]*states).sum(0)+mu.estim[j]+1)/((cascade.total[j]*states).sum(0)+mu.estim[j]+1) \
                * ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio)
    elif model=='modelC':
        raise NotImplementedError

    return M1, M2
Beispiel #23
0
def logposteriorodds_poissonbinomial(reads, gamma, pi, parameters):

    N,L = reads.shape
    cascade = Cascade(L)
    cascade.setreads(reads)
    logodds = np.zeros((N,),dtype=float)

    if gamma.model=='modelA':
        B = parameters
    elif gamma.model=='modelB':
        mu = parameters
    elif gamma.model=='modelC':
        B, tau = parameters

    for j in xrange(pi.J):
        if gamma.model=='modelA':
            lhoodA = cascade.total[j]*nplog(0.5)
            lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j])

        elif gamma.model=='modelB':
            lhoodA = cascade.total[j]*nplog(0.5)
            lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \
                    - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j])

        elif gamma.model=='modelC':
            lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \
                    - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j])
            lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \
                    + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \
                    - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \
                    - gammaln((1-B.value[j])*tau.value[j])

        logratio = nplog(1-pi.estim[j]) + lhoodB - nplog(pi.estim[j]) - lhoodA
        logodds += np.sum(nplog(pi.estim[j]) - nplog(logistic(logratio)),1)
        
    return logodds
Beispiel #24
0
def logposteriorodds_multinomial(reads, footprint, null):

    logodds = insum(reads*nplog(footprint.ravel()),[1]) - insum(reads*nplog(null),[1])

    return logodds.ravel()
def logposteriorodds_multinomial(reads, footprint, null):

    logodds = insum(reads * nplog(footprint.ravel()), [1]) - insum(reads * nplog(null), [1])

    return logodds.ravel()
def likelihood(cascade,
               scores,
               eta,
               gamma,
               pi,
               alpha,
               beta,
               tau,
               mu=None,
               B=None,
               omega=None,
               omegao=None):

    apriori = beta.estim[0] + beta.estim[1] * scores

    if gamma.model == 'modelA':
        lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
    elif gamma.model == 'modelB':
        lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
    elif gamma.model == 'modelC':
        lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                              B=B,
                                              omega=omega,
                                              omegao=omegao,
                                              model=gamma.model)

    footprint = np.zeros((cascade.N, 1), dtype=float)
    for j in xrange(pi.J):
        footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \
                + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

    P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \
        + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1])
    P_1[P_1 == np.inf] = MAX
    P_1[P_1 == -np.inf] = -MAX

    null = np.zeros((cascade.N, 1), dtype=float)
    for j in xrange(cascade.J):
        if gamma.model == 'modelC':
            null = null + insum(lhoodC.value[j], [1])
        else:
            null = null + insum(lhoodA.value[j], [1])
    P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \
        + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0])
    P_0[P_0 == np.inf] = MAX
    P_0[P_0 == -np.inf] = -MAX

    L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \
        - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1])

    L = L.sum()

    if np.isnan(L):
        print "Nan in LogLike"
        raise ValueError

    if np.isinf(L):
        print "Inf in LogLike"
        raise ValueError

    return L