def likelihoodAB(cascade, mu=None, B=None, omega=None, omegao=None, model='modelB'): lhoodA = Cascade(cascade.L) lhoodB = Cascade(cascade.L) if model=='modelC': lhoodC = Cascade(cascade.L) for j in xrange(cascade.J): if model=='modelA': lhoodA.value[j] = cascade.total[j]*nplog(0.5) lhoodB.value[j] = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j]) elif model=='modelB': lhoodA.value[j] = cascade.total[j]*nplog(0.5) lhoodB.value[j] = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \ - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) elif model=='modelC': lhoodA.value[j] = gammaln(cascade.value[j]+0.5*omega.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omega.value[j]) \ - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - 2*gammaln(0.5*omega.value[j]) lhoodB.value[j] = gammaln(cascade.value[j]+B.value[j]*omega.value[j]) \ + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*omega.value[j]) \ - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - gammaln(B.value[j]*omega.value[j]) \ - gammaln((1-B.value[j])*omega.value[j]) lhoodC.value[j] = gammaln(cascade.value[j]+0.5*omegao.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omegao.value[j]) \ - gammaln(cascade.total[j]+omegao.value[j]) + gammaln(omegao.value[j]) - 2*gammaln(0.5*omegao.value[j]) if model=='modelC': return lhoodA, lhoodB, lhoodC else: return lhoodA, lhoodB
def bayes_optimal_estimator(cascade, pi, mu): R = Cascade(cascade.L) for j in range(pi.J): ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln(cascade.value[j].sum(0)+mu.estim[j]) \ + gammaln(cascade.total[j].sum(0)-cascade.value[j].sum(0)+mu.estim[j]) \ - gammaln(cascade.total[j].sum(0)+2*mu.estim[j]) \ + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - cascade.total[j].sum(0)*nplog(0.5) R.value[j] = 0.5*logistic(ratio) \ + (cascade.value[j].sum(0)+mu.estim[j])/(cascade.total[j].sum(0)+mu.estim[j])*logistic(-ratio) return R
def update_Estep(self, cascade, eta, pi, mu=None, B=None, omega=None, omegao=None): if self.model=='modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=self.model) elif self.model=='modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=self.model) elif self.model=='modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=self.model) for j in xrange(self.J): log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) \ + outsum(eta.estim[:,1:]*(lhoodA.value[j]-lhoodB.value[j]))/outsum(eta.estim[:,1:]) self.value[j] = newlogistic(-log_posterior_odds)
def update_Estep(self, cascade, scores, alpha, beta, tau, pi, gamma, mu=None, B=None, omega=None, omegao=None): footprint_logodds = np.zeros((self.N,1),dtype=float) if gamma.model=='modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model=='modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model=='modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) for j in xrange(pi.J): footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \ + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \ + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \ + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \ + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0])) self.estim[:,0] = 0. self.estim[self.estim==np.inf] = MAX self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1)) self.estim = self.estim/insum(self.estim,[1]) if np.isnan(self.estim).any(): print "Nan in Eta" raise ValueError if np.isinf(self.estim).any(): print "Inf in Eta" raise ValueError
def likelihood(cascade, scores, eta, gamma, pi, alpha, beta, tau, mu=None, B=None, omega=None, omegao=None): apriori = beta.estim[0] + beta.estim[1]*scores if gamma.model=='modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model=='modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model=='modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) footprint = np.zeros((cascade.N,1),dtype=float) for j in xrange(pi.J): footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \ + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1]) P_1[P_1==np.inf] = MAX P_1[P_1==-np.inf] = -MAX null = np.zeros((cascade.N,1),dtype=float) for j in xrange(cascade.J): if gamma.model=='modelC': null = null + insum(lhoodC.value[j],[1]) else: null = null + insum(lhoodA.value[j],[1]) P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \ + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0]) P_0[P_0==np.inf] = MAX P_0[P_0==-np.inf] = -MAX L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \ - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1]) L = L.sum() if np.isnan(L): print "Nan in LogLike" raise ValueError if np.isinf(L): print "Inf in LogLike" raise ValueError return L
def F(x): arg = x[0] + x[1] * scores func = arg * insum(eta.estim[:, 1:], 1) - nplog(1 + np.exp(arg)) f = -1. * func.sum() if np.isnan(f) or np.isinf(f): return np.inf else: return f
def F(x): arg = x[0]+x[1]*scores func = arg*insum(eta.estim[:,1:],1) - nplog(1+np.exp(arg)) f = -1.*func.sum() if np.isnan(f) or np.isinf(f): return np.inf else: return f
def loglikelihood(cascade, gamma, pi, B=None, mu=None, tau=None): L = 0. for j in xrange(cascade.J): if gamma.model=='modelA': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j]) elif gamma.model=='modelB': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \ - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) elif gamma.model=='modelC': lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j]) lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \ + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \ - gammaln((1-B.value[j])*tau.value[j]) L += np.sum(gamma.value[j]*lhoodA.sum(0) + (1-gamma.value[j])*lhoodB.sum(0) \ + cascade.N*(gamma.value[j]*nplog(pi.estim[j]) + (1-gamma.value[j])*nplog(1-pi.estim[j]) \ - gamma.value[j]*nplog(gamma.value[j]) - (1-gamma.value[j])*nplog(1-gamma.value[j]))) return L
def bayes_optimal_estimator(cascade, pi, mu): R = Cascade(cascade.L) for j in range(pi.J): ratio = ( nplog(1 - pi.estim[j]) - nplog(pi.estim[j]) + gammaln(cascade.value[j].sum(0) + mu.estim[j]) + gammaln(cascade.total[j].sum(0) - cascade.value[j].sum(0) + mu.estim[j]) - gammaln(cascade.total[j].sum(0) + 2 * mu.estim[j]) + gammaln(2 * mu.estim[j]) - 2 * gammaln(mu.estim[j]) - cascade.total[j].sum(0) * nplog(0.5) ) R.value[j] = 0.5 * logistic(ratio) + (cascade.value[j].sum(0) + mu.estim[j]) / ( cascade.total[j].sum(0) + mu.estim[j] ) * logistic(-ratio) return R
def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \ pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None): self.N = cascade.N self.total = totalreads.reshape(self.N, 1) self.estim = np.zeros((self.N, 2), dtype=float) if alpha is None: indices = np.argsort(self.total.ravel())[:self.N / 2] self.estim[indices, 1:] = -MAX indices = np.argsort(self.total.ravel())[self.N / 2:] self.estim[indices, 1:] = MAX else: footprint_logodds = np.zeros((self.N, 1), dtype=float) if gamma.model == 'modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model == 'modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model == 'modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) for j in xrange(pi.J): if model == 'modelC': footprint_logodds += insum( gamma.value[j] * lhoodA.value[j] - lhoodC.value[j] + (1 - gamma.value[j]) * lhoodB.value[j], [1]) else: footprint_logodds += insum( (1 - gamma.value[j]) * (lhoodB.value[j] - lhoodA.value[j]), [1]) footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \ + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \ + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \ + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0])) if alpha is None: self.estim[self.estim == np.inf] = MAX self.estim = np.exp(self.estim - np.max(self.estim, 1).reshape(self.N, 1)) self.estim = self.estim / insum(self.estim, [1]) else: self.estim[:, 1:] = self.estim[:, 1:] / np.log(10)
def update(self, cascade, pi, mu=None, B=None, tau=None): for j in xrange(self.J): if self.model == "modelA": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog( 1 - B.value[j] ) elif self.model == "modelB": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = ( gammaln(cascade.value[j] + mu.estim[j]) + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j]) - gammaln(cascade.total[j] + 2 * mu.estim[j]) + gammaln(2 * mu.estim[j]) - 2 * gammaln(mu.estim[j]) ) elif self.model == "modelC": lhoodA = ( gammaln(cascade.value[j] + 0.5 * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - 2 * gammaln(0.5 * tau.value[j]) ) lhoodB = ( gammaln(cascade.value[j] + B.value[j] * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j] * tau.value[j]) - gammaln((1 - B.value[j]) * tau.value[j]) ) log_posterior_odds = ( nplog(pi.estim[j]) - nplog(1 - pi.estim[j]) + 1.0 / cascade.N * (lhoodA.sum(0) - lhoodB.sum(0)) ) self.value[j] = logistic(-log_posterior_odds)
def update(self, cascade, pi, mu=None, B=None, tau=None): for j in xrange(self.J): if self.model=='modelA': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j]) elif self.model=='modelB': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \ - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) elif self.model=='modelC': lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j]) lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \ + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \ - gammaln((1-B.value[j])*tau.value[j]) log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) + 1./cascade.N*(lhoodA.sum(0) - lhoodB.sum(0)) self.value[j] = logistic(-log_posterior_odds)
def update_Estep(self, cascade, scores, alpha, beta, tau, pi, gamma, mu=None, B=None, omega=None, omegao=None): footprint_logodds = np.zeros((self.N, 1), dtype=float) if gamma.model == 'modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model == 'modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model == 'modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) for j in xrange(pi.J): footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \ + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \ + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \ + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \ + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0])) self.estim[:, 0] = 0. self.estim[self.estim == np.inf] = MAX self.estim = np.exp(self.estim - np.max(self.estim, 1).reshape(self.N, 1)) self.estim = self.estim / insum(self.estim, [1]) if np.isnan(self.estim).any(): print "Nan in Eta" raise ValueError if np.isinf(self.estim).any(): print "Inf in Eta" raise ValueError
def logposteriorodds_poissonbinomial(reads, gamma, pi, parameters): N, L = reads.shape cascade = Cascade(L) cascade.setreads(reads) logodds = np.zeros((N,), dtype=float) if gamma.model == "modelA": B = parameters elif gamma.model == "modelB": mu = parameters elif gamma.model == "modelC": B, tau = parameters for j in xrange(pi.J): if gamma.model == "modelA": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog( 1 - B.value[j] ) elif gamma.model == "modelB": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = ( gammaln(cascade.value[j] + mu.estim[j]) + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j]) - gammaln(cascade.total[j] + 2 * mu.estim[j]) + gammaln(2 * mu.estim[j]) - 2 * gammaln(mu.estim[j]) ) elif gamma.model == "modelC": lhoodA = ( gammaln(cascade.value[j] + 0.5 * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - 2 * gammaln(0.5 * tau.value[j]) ) lhoodB = ( gammaln(cascade.value[j] + B.value[j] * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j] * tau.value[j]) - gammaln((1 - B.value[j]) * tau.value[j]) ) logratio = nplog(1 - pi.estim[j]) + lhoodB - nplog(pi.estim[j]) - lhoodA logodds += np.sum(nplog(pi.estim[j]) - nplog(logistic(logratio)), 1) return logodds
def likelihoodAB(cascade, mu=None, B=None, omega=None, omegao=None, model='modelB'): lhoodA = Cascade(cascade.L) lhoodB = Cascade(cascade.L) if model == 'modelC': lhoodC = Cascade(cascade.L) for j in xrange(cascade.J): if model == 'modelA': lhoodA.value[j] = cascade.total[j] * nplog(0.5) lhoodB.value[j] = cascade.value[j] * nplog(B.value[j]) + ( cascade.total[j] - cascade.value[j]) * nplog(1 - B.value[j]) elif model == 'modelB': lhoodA.value[j] = cascade.total[j] * nplog(0.5) lhoodB.value[j] = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \ - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) elif model == 'modelC': lhoodA.value[j] = gammaln(cascade.value[j]+0.5*omega.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omega.value[j]) \ - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - 2*gammaln(0.5*omega.value[j]) lhoodB.value[j] = gammaln(cascade.value[j]+B.value[j]*omega.value[j]) \ + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*omega.value[j]) \ - gammaln(cascade.total[j]+omega.value[j]) + gammaln(omega.value[j]) - gammaln(B.value[j]*omega.value[j]) \ - gammaln((1-B.value[j])*omega.value[j]) lhoodC.value[j] = gammaln(cascade.value[j]+0.5*omegao.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*omegao.value[j]) \ - gammaln(cascade.total[j]+omegao.value[j]) + gammaln(omegao.value[j]) - 2*gammaln(0.5*omegao.value[j]) if model == 'modelC': return lhoodA, lhoodB, lhoodC else: return lhoodA, lhoodB
def update_Estep(self, cascade, eta, pi, mu=None, B=None, omega=None, omegao=None): if self.model == 'modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=self.model) elif self.model == 'modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=self.model) elif self.model == 'modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=self.model) for j in xrange(self.J): log_posterior_odds = nplog(pi.estim[j]) - nplog(1-pi.estim[j]) \ + outsum(eta.estim[:,1:]*(lhoodA.value[j]-lhoodB.value[j]))/outsum(eta.estim[:,1:]) self.value[j] = newlogistic(-log_posterior_odds)
def loglikelihood(cascade, gamma, pi, B=None, mu=None, tau=None): L = 0.0 for j in xrange(cascade.J): if gamma.model == "modelA": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = cascade.value[j] * nplog(B.value[j]) + (cascade.total[j] - cascade.value[j]) * nplog( 1 - B.value[j] ) elif gamma.model == "modelB": lhoodA = cascade.total[j] * nplog(0.5) lhoodB = ( gammaln(cascade.value[j] + mu.estim[j]) + gammaln(cascade.total[j] - cascade.value[j] + mu.estim[j]) - gammaln(cascade.total[j] + 2 * mu.estim[j]) + gammaln(2 * mu.estim[j]) - 2 * gammaln(mu.estim[j]) ) elif gamma.model == "modelC": lhoodA = ( gammaln(cascade.value[j] + 0.5 * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + 0.5 * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - 2 * gammaln(0.5 * tau.value[j]) ) lhoodB = ( gammaln(cascade.value[j] + B.value[j] * tau.value[j]) + gammaln(cascade.total[j] - cascade.value[j] + (1 - B.value[j]) * tau.value[j]) - gammaln(cascade.total[j] + tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j] * tau.value[j]) - gammaln((1 - B.value[j]) * tau.value[j]) ) L += np.sum( gamma.value[j] * lhoodA.sum(0) + (1 - gamma.value[j]) * lhoodB.sum(0) + cascade.N * ( gamma.value[j] * nplog(pi.estim[j]) + (1 - gamma.value[j]) * nplog(1 - pi.estim[j]) - gamma.value[j] * nplog(gamma.value[j]) - (1 - gamma.value[j]) * nplog(1 - gamma.value[j]) ) ) return L
def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \ pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None): self.N = cascade.N self.total = totalreads.reshape(self.N,1) self.estim = np.zeros((self.N, 2),dtype=float) if alpha is None: indices = np.argsort(self.total.ravel())[:self.N/2] self.estim[indices,1:] = -MAX indices = np.argsort(self.total.ravel())[self.N/2:] self.estim[indices,1:] = MAX else: footprint_logodds = np.zeros((self.N,1),dtype=float) if gamma.model=='modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model=='modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model=='modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) for j in xrange(pi.J): if model=='modelC': footprint_logodds += insum(gamma.value[j]*lhoodA.value[j]-lhoodC.value[j]+(1-gamma.value[j])*lhoodB.value[j],[1]) else: footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]),[1]) footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \ + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \ + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \ + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0])) if alpha is None: self.estim[self.estim==np.inf] = MAX self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1)) self.estim = self.estim/insum(self.estim,[1]) else: self.estim[:,1:] = self.estim[:,1:]/np.log(10)
def update_Mstep(self, eta, tau): etaestim = np.zeros((eta.estim.shape[0], 2), dtype=float) etaestim[:, 0] = eta.estim[:, 0] etaestim[:, 1] = eta.estim[:, 1:].sum(1) C = nplog(tau.estim) * outsum(etaestim) def F(x): func = outsum(gammaln(eta.total+x)*etaestim) \ - gammaln(x)*outsum(etaestim) + C*x f = -1. * func.sum() if np.isnan(f) or np.isinf(f): return np.inf else: return f def Fprime(x): df = outsum(digamma(eta.total+x)*etaestim) \ - digamma(x)*outsum(etaestim) + C Df = -1. * df.ravel() if np.isnan(Df).any() or np.isinf(Df).any(): return np.array([np.inf, np.inf]) else: return Df bounds = [(0, None), (0, None)] xo = self.estim.copy() solution = opt.fmin_l_bfgs_b(F, xo, fprime=Fprime, bounds=bounds, disp=0) self.estim = solution[0] if np.isnan(self.estim).any(): print "Nan in Alpha" raise ValueError if np.isinf(self.estim).any(): print "Inf in Alpha" raise ValueError
def bayes_optimal_estimator(cascade, eta, pi, B=None, mu=None, model='modelA'): """ computes the posterior mean conditional on the most likely set of states for gamma. """ M1 = Cascade(cascade.L) M2 = Cascade(cascade.L) if isinstance(eta, Eta): states = eta.estim[:, 1:] > 0.5 else: states = eta[:, 1:] if not isinstance(pi, Pi): pitmp = Pi(cascade.J) pitmp.estim = pi pi = pitmp if model == 'modelA': for j in range(pi.J): ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + (cascade.value[j]*states).sum(0)*nplog(B.value[j]) \ + ((cascade.total[j]-cascade.value[j])*states).sum(0)*nplog(1-B.value[j]) \ - (cascade.total[j]*states).sum(0)*nplog(0.5) M1.value[j] = 0.5 * newlogistic(ratio) + B.value[j] * newlogistic( -ratio) M2.value[j] = 0.25 * newlogistic( ratio) + B.value[j]**2 * newlogistic(-ratio) elif model == 'modelB': if not isinstance(mu, Mu): mutmp = Mu(cascade.J) mutmp.estim = mu mu = mutmp for j in range(pi.J): ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln((cascade.value[j]*states).sum(0)+mu.estim[j]) \ + gammaln((cascade.total[j]*states).sum(0)-(cascade.value[j]*states).sum(0)+mu.estim[j]) \ - gammaln((cascade.total[j]*states).sum(0)+2*mu.estim[j]) \ + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - (cascade.total[j]*states).sum(0)*nplog(0.5) M1.value[j] = 0.5*newlogistic(ratio) \ + ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio) M2.value[j] = 0.25*newlogistic(ratio) \ + ((cascade.value[j]*states).sum(0)+mu.estim[j]+1)/((cascade.total[j]*states).sum(0)+mu.estim[j]+1) \ * ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio) elif model == 'modelC': raise NotImplementedError return M1, M2
def update_Mstep(self, eta, tau): etaestim = np.zeros((eta.estim.shape[0],2),dtype=float) etaestim[:,0] = eta.estim[:,0] etaestim[:,1] = eta.estim[:,1:].sum(1) C = nplog(tau.estim)*outsum(etaestim) def F(x): func = outsum(gammaln(eta.total+x)*etaestim) \ - gammaln(x)*outsum(etaestim) + C*x f = -1.*func.sum() if np.isnan(f) or np.isinf(f): return np.inf else: return f def Fprime(x): df = outsum(digamma(eta.total+x)*etaestim) \ - digamma(x)*outsum(etaestim) + C Df = -1.*df.ravel() if np.isnan(Df).any() or np.isinf(Df).any(): return np.array([np.inf, np.inf]) else: return Df bounds = [(0, None), (0, None)] xo = self.estim.copy() solution = opt.fmin_l_bfgs_b(F, xo, fprime=Fprime, bounds=bounds, disp=0) self.estim = solution[0] if np.isnan(self.estim).any(): print "Nan in Alpha" raise ValueError if np.isinf(self.estim).any(): print "Inf in Alpha" raise ValueError
def bayes_optimal_estimator(cascade, eta, pi, B=None, mu=None, model='modelA'): """ computes the posterior mean conditional on the most likely set of states for gamma. """ M1 = Cascade(cascade.L) M2 = Cascade(cascade.L) if isinstance(eta, Eta): states = eta.estim[:,1:]>0.5 else: states = eta[:,1:] if not isinstance(pi, Pi): pitmp = Pi(cascade.J) pitmp.estim = pi pi = pitmp if model=='modelA': for j in range(pi.J): ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + (cascade.value[j]*states).sum(0)*nplog(B.value[j]) \ + ((cascade.total[j]-cascade.value[j])*states).sum(0)*nplog(1-B.value[j]) \ - (cascade.total[j]*states).sum(0)*nplog(0.5) M1.value[j] = 0.5*newlogistic(ratio) + B.value[j]*newlogistic(-ratio) M2.value[j] = 0.25*newlogistic(ratio) + B.value[j]**2*newlogistic(-ratio) elif model=='modelB': if not isinstance(mu, Mu): mutmp = Mu(cascade.J) mutmp.estim = mu mu = mutmp for j in range(pi.J): ratio = nplog(1-pi.estim[j]) - nplog(pi.estim[j]) + gammaln((cascade.value[j]*states).sum(0)+mu.estim[j]) \ + gammaln((cascade.total[j]*states).sum(0)-(cascade.value[j]*states).sum(0)+mu.estim[j]) \ - gammaln((cascade.total[j]*states).sum(0)+2*mu.estim[j]) \ + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) - (cascade.total[j]*states).sum(0)*nplog(0.5) M1.value[j] = 0.5*newlogistic(ratio) \ + ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio) M2.value[j] = 0.25*newlogistic(ratio) \ + ((cascade.value[j]*states).sum(0)+mu.estim[j]+1)/((cascade.total[j]*states).sum(0)+mu.estim[j]+1) \ * ((cascade.value[j]*states).sum(0)+mu.estim[j])/((cascade.total[j]*states).sum(0)+mu.estim[j])*newlogistic(-ratio) elif model=='modelC': raise NotImplementedError return M1, M2
def logposteriorodds_poissonbinomial(reads, gamma, pi, parameters): N,L = reads.shape cascade = Cascade(L) cascade.setreads(reads) logodds = np.zeros((N,),dtype=float) if gamma.model=='modelA': B = parameters elif gamma.model=='modelB': mu = parameters elif gamma.model=='modelC': B, tau = parameters for j in xrange(pi.J): if gamma.model=='modelA': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = cascade.value[j]*nplog(B.value[j]) + (cascade.total[j]-cascade.value[j])*nplog(1-B.value[j]) elif gamma.model=='modelB': lhoodA = cascade.total[j]*nplog(0.5) lhoodB = gammaln(cascade.value[j]+mu.estim[j]) + gammaln(cascade.total[j]-cascade.value[j]+mu.estim[j]) \ - gammaln(cascade.total[j]+2*mu.estim[j]) + gammaln(2*mu.estim[j]) - 2*gammaln(mu.estim[j]) elif gamma.model=='modelC': lhoodA = gammaln(cascade.value[j]+0.5*tau.value[j]) + gammaln(cascade.total[j]-cascade.value[j]+0.5*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - 2*gammaln(0.5*tau.value[j]) lhoodB = gammaln(cascade.value[j]+B.value[j]*tau.value[j]) \ + gammaln(cascade.total[j]-cascade.value[j]+(1-B.value[j])*tau.value[j]) \ - gammaln(cascade.total[j]+tau.value[j]) + gammaln(tau.value[j]) - gammaln(B.value[j]*tau.value[j]) \ - gammaln((1-B.value[j])*tau.value[j]) logratio = nplog(1-pi.estim[j]) + lhoodB - nplog(pi.estim[j]) - lhoodA logodds += np.sum(nplog(pi.estim[j]) - nplog(logistic(logratio)),1) return logodds
def logposteriorodds_multinomial(reads, footprint, null): logodds = insum(reads*nplog(footprint.ravel()),[1]) - insum(reads*nplog(null),[1]) return logodds.ravel()
def logposteriorodds_multinomial(reads, footprint, null): logodds = insum(reads * nplog(footprint.ravel()), [1]) - insum(reads * nplog(null), [1]) return logodds.ravel()
def likelihood(cascade, scores, eta, gamma, pi, alpha, beta, tau, mu=None, B=None, omega=None, omegao=None): apriori = beta.estim[0] + beta.estim[1] * scores if gamma.model == 'modelA': lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model) elif gamma.model == 'modelB': lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model) elif gamma.model == 'modelC': lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model) footprint = np.zeros((cascade.N, 1), dtype=float) for j in xrange(pi.J): footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \ + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \ + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1]) P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \ + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1]) P_1[P_1 == np.inf] = MAX P_1[P_1 == -np.inf] = -MAX null = np.zeros((cascade.N, 1), dtype=float) for j in xrange(cascade.J): if gamma.model == 'modelC': null = null + insum(lhoodC.value[j], [1]) else: null = null + insum(lhoodA.value[j], [1]) P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \ + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0]) P_0[P_0 == np.inf] = MAX P_0[P_0 == -np.inf] = -MAX L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \ - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1]) L = L.sum() if np.isnan(L): print "Nan in LogLike" raise ValueError if np.isinf(L): print "Inf in LogLike" raise ValueError return L