def update_Estep(self, cascade, scores, alpha, beta, tau, pi, gamma, mu=None, B=None, omega=None, omegao=None): 

        footprint_logodds = np.zeros((self.N,1),dtype=float)
        if gamma.model=='modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
        elif gamma.model=='modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
        elif gamma.model=='modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

        for j in xrange(pi.J):
            footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \
                    + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

        self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
            + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
            + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
            + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
            + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))
        self.estim[:,0] = 0.
        self.estim[self.estim==np.inf] = MAX
        self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1))
        self.estim = self.estim/insum(self.estim,[1])

        if np.isnan(self.estim).any():
            print "Nan in Eta"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Eta"
            raise ValueError
    def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \
        pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None):

        self.N = cascade.N
        self.total = totalreads.reshape(self.N, 1)

        self.estim = np.zeros((self.N, 2), dtype=float)
        if alpha is None:
            indices = np.argsort(self.total.ravel())[:self.N / 2]
            self.estim[indices, 1:] = -MAX
            indices = np.argsort(self.total.ravel())[self.N / 2:]
            self.estim[indices, 1:] = MAX
        else:
            footprint_logodds = np.zeros((self.N, 1), dtype=float)
            if gamma.model == 'modelA':
                lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
            elif gamma.model == 'modelB':
                lhoodA, lhoodB = likelihoodAB(cascade,
                                              mu=mu,
                                              model=gamma.model)
            elif gamma.model == 'modelC':
                lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                                      B=B,
                                                      omega=omega,
                                                      omegao=omegao,
                                                      model=gamma.model)

            for j in xrange(pi.J):
                if model == 'modelC':
                    footprint_logodds += insum(
                        gamma.value[j] * lhoodA.value[j] - lhoodC.value[j] +
                        (1 - gamma.value[j]) * lhoodB.value[j], [1])
                else:
                    footprint_logodds += insum(
                        (1 - gamma.value[j]) *
                        (lhoodB.value[j] - lhoodA.value[j]), [1])
                footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

            self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
                + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
                + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
                + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
                + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))

        if alpha is None:
            self.estim[self.estim == np.inf] = MAX
            self.estim = np.exp(self.estim -
                                np.max(self.estim, 1).reshape(self.N, 1))
            self.estim = self.estim / insum(self.estim, [1])
        else:
            self.estim[:, 1:] = self.estim[:, 1:] / np.log(10)
    def update_Estep(self,
                     cascade,
                     scores,
                     alpha,
                     beta,
                     tau,
                     pi,
                     gamma,
                     mu=None,
                     B=None,
                     omega=None,
                     omegao=None):

        footprint_logodds = np.zeros((self.N, 1), dtype=float)
        if gamma.model == 'modelA':
            lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
        elif gamma.model == 'modelB':
            lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
        elif gamma.model == 'modelC':
            lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                                  B=B,
                                                  omega=omega,
                                                  omegao=omegao,
                                                  model=gamma.model)

        for j in xrange(pi.J):
            footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]) \
                    + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

        self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
            + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
            + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
            + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
            + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))
        self.estim[:, 0] = 0.
        self.estim[self.estim == np.inf] = MAX
        self.estim = np.exp(self.estim -
                            np.max(self.estim, 1).reshape(self.N, 1))
        self.estim = self.estim / insum(self.estim, [1])

        if np.isnan(self.estim).any():
            print "Nan in Eta"
            raise ValueError

        if np.isinf(self.estim).any():
            print "Inf in Eta"
            raise ValueError
def likelihood(cascade, scores, eta, gamma, pi, alpha, beta, tau, mu=None, B=None, omega=None, omegao=None):

    apriori = beta.estim[0] + beta.estim[1]*scores

    if gamma.model=='modelA':
        lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
    elif gamma.model=='modelB':
        lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
    elif gamma.model=='modelC':
        lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

    footprint = np.zeros((cascade.N,1),dtype=float)
    for j in xrange(pi.J):
        footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \
                + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

    P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \
        + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1])
    P_1[P_1==np.inf] = MAX
    P_1[P_1==-np.inf] = -MAX

    null = np.zeros((cascade.N,1),dtype=float)
    for j in xrange(cascade.J):
        if gamma.model=='modelC':
            null = null + insum(lhoodC.value[j],[1])
        else:
            null = null + insum(lhoodA.value[j],[1])
    P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \
        + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0])
    P_0[P_0==np.inf] = MAX
    P_0[P_0==-np.inf] = -MAX

    L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \
        - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1])
    
    L = L.sum()

    if np.isnan(L):
        print "Nan in LogLike"
        raise ValueError

    if np.isinf(L):
        print "Inf in LogLike"
        raise ValueError

    return L
 def F(x):
     arg = x[0]+x[1]*scores
     func = arg*insum(eta.estim[:,1:],1) - nplog(1+np.exp(arg))
     f = -1.*func.sum()
     if np.isnan(f) or np.isinf(f):
         return np.inf
     else:
         return f
 def F(x):
     arg = x[0] + x[1] * scores
     func = arg * insum(eta.estim[:, 1:], 1) - nplog(1 + np.exp(arg))
     f = -1. * func.sum()
     if np.isnan(f) or np.isinf(f):
         return np.inf
     else:
         return f
 def Fprime(x):
     arg = x[0]+x[1]*scores
     df1 = insum(eta.estim[:,1:],1) - logistic(-arg)
     df2 = df1*scores
     Df = -1.*np.array([df1.sum(), df2.sum()])
     if np.isnan(Df).any() or np.isinf(Df).any():
         return np.inf
     else:
         return Df
 def Fprime(x):
     arg = x[0] + x[1] * scores
     df1 = insum(eta.estim[:, 1:], 1) - logistic(-arg)
     df2 = df1 * scores
     Df = -1. * np.array([df1.sum(), df2.sum()])
     if np.isnan(Df).any() or np.isinf(Df).any():
         return np.inf
     else:
         return Df
    def __init__(self, cascade, totalreads, scores, gamma=None, beta=None, \
        pi=None, mu=None, B=None, omega=None, omegao=None, alpha=None, tau=None):

        self.N = cascade.N
        self.total = totalreads.reshape(self.N,1)

        self.estim = np.zeros((self.N, 2),dtype=float)
        if alpha is None:
            indices = np.argsort(self.total.ravel())[:self.N/2]
            self.estim[indices,1:] = -MAX
            indices = np.argsort(self.total.ravel())[self.N/2:]
            self.estim[indices,1:] = MAX
        else:
            footprint_logodds = np.zeros((self.N,1),dtype=float)
            if gamma.model=='modelA':
                lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
            elif gamma.model=='modelB':
                lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
            elif gamma.model=='modelC':
                lhoodA, lhoodB, lhoodC = likelihoodAB(cascade, B=B, omega=omega, omegao=omegao, model=gamma.model)

            for j in xrange(pi.J):
                if model=='modelC':
                    footprint_logodds += insum(gamma.value[j]*lhoodA.value[j]-lhoodC.value[j]+(1-gamma.value[j])*lhoodB.value[j],[1])
                else:
                    footprint_logodds += insum((1-gamma.value[j])*(lhoodB.value[j]-lhoodA.value[j]),[1])
                footprint_logodds += insum(gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                    + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

            self.estim[:,1:] = beta.estim[0] + beta.estim[1]*scores + footprint_logodds \
                + gammaln(self.total+alpha.estim[1]) - gammaln(self.total+alpha.estim[0]) \
                + gammaln(alpha.estim[0]) - gammaln(alpha.estim[1]) \
                + alpha.estim[1]*nplog(tau.estim[1]) - alpha.estim[0]*nplog(tau.estim[0]) \
                + self.total*(nplog(1-tau.estim[1])-nplog(1-tau.estim[0]))

        if alpha is None:
            self.estim[self.estim==np.inf] = MAX
            self.estim = np.exp(self.estim-np.max(self.estim,1).reshape(self.N,1))
            self.estim = self.estim/insum(self.estim,[1])
        else:
            self.estim[:,1:] = self.estim[:,1:]/np.log(10)
Beispiel #10
0
    def getnull(self, locations, sample='', width=200):

        left = self.k/2
        right = self.k/2-1
        if sample=='':
            cutrate = self.cutrate
        else:
            cutrate = self.cutrate[sample]

        strand = np.array([1 if loc[3]=='+' else 0 for loc in locations])
        # removed a +1 for the - strand
        sequences = np.array([utils.makestr(self.genome[loc[0]][int(loc[1])-width/2-left:int(loc[1])+width/2+right]) if loc[3]=='+' \
            else utils.makestr(self.genome[loc[0]][int(loc[2])-width/2-left:int(loc[2])+width/2+right]) \
            for loc in locations])
        null = sequence_null.getnull(sequences, strand, cutrate, width, self.k)
        null[null==0] = 1e-8
        null = null/utils.insum(null,[1])
        return null
Beispiel #11
0
    def getnull(self, locations, sample='', width=200):

        left = self.k / 2
        right = self.k / 2 - 1
        if sample == '':
            cutrate = self.cutrate
        else:
            cutrate = self.cutrate[sample]

        strand = np.array([1 if loc[3] == '+' else 0 for loc in locations])
        # removed a +1 for the - strand
        sequences = np.array([utils.makestr(self.genome[loc[0]][int(loc[1])-width/2-left:int(loc[1])+width/2+right]) if loc[3]=='+' \
            else utils.makestr(self.genome[loc[0]][int(loc[2])-width/2-left:int(loc[2])+width/2+right]) \
            for loc in locations])
        null = sequence_null.getnull(sequences, strand, cutrate, width, self.k)
        null[null == 0] = 1e-8
        null = null / utils.insum(null, [1])
        return null
Beispiel #12
0
def logposteriorodds_multinomial(reads, footprint, null):

    logodds = insum(reads*nplog(footprint.ravel()),[1]) - insum(reads*nplog(null),[1])

    return logodds.ravel()
import numpy as np
import scipy.optimize as opt
from scipy.special import digamma, gammaln
from utils import insum, outsum, nplog, EPS, MAX
import cPickle, time, math, pdb

logistic = lambda x: 1./(1+insum(np.exp(x),[1]))
newlogistic = lambda x: 1./(1+np.exp(x))

class Cascade:

    def __init__(self, L):

        self.L = L
        if math.frexp(self.L)[0]!=0.5:
            print "profile size is not a power of 2"
            pdb.set_trace()

        self.J = math.frexp(self.L)[1]-1
        self.data = False
        self.value = dict()

    def setreads(self, reads):
        self.data = True
        N,L = reads.shape
        self.N = N
        if L!=self.L:
            print "data dimensions do not match"
            pdb.set_trace()

        self.transform(reads)
def likelihood(cascade,
               scores,
               eta,
               gamma,
               pi,
               alpha,
               beta,
               tau,
               mu=None,
               B=None,
               omega=None,
               omegao=None):

    apriori = beta.estim[0] + beta.estim[1] * scores

    if gamma.model == 'modelA':
        lhoodA, lhoodB = likelihoodAB(cascade, B=B, model=gamma.model)
    elif gamma.model == 'modelB':
        lhoodA, lhoodB = likelihoodAB(cascade, mu=mu, model=gamma.model)
    elif gamma.model == 'modelC':
        lhoodA, lhoodB, lhoodC = likelihoodAB(cascade,
                                              B=B,
                                              omega=omega,
                                              omegao=omegao,
                                              model=gamma.model)

    footprint = np.zeros((cascade.N, 1), dtype=float)
    for j in xrange(pi.J):
        footprint += insum(gamma.value[j]*lhoodA.value[j] + (1-gamma.value[j])*lhoodB.value[j] \
                + gamma.value[j]*(nplog(pi.estim[j])-nplog(gamma.value[j])) \
                + (1-gamma.value[j])*(nplog(1-pi.estim[j])-nplog(1-gamma.value[j])),[1])

    P_1 = footprint + gammaln(eta.total+alpha.estim[1]) - gammaln(alpha.estim[1]) \
        + alpha.estim[1]*nplog(tau.estim[1]) + eta.total*nplog(1-tau.estim[1])
    P_1[P_1 == np.inf] = MAX
    P_1[P_1 == -np.inf] = -MAX

    null = np.zeros((cascade.N, 1), dtype=float)
    for j in xrange(cascade.J):
        if gamma.model == 'modelC':
            null = null + insum(lhoodC.value[j], [1])
        else:
            null = null + insum(lhoodA.value[j], [1])
    P_0 = null + gammaln(eta.total+alpha.estim[0]) - gammaln(alpha.estim[0]) \
        + alpha.estim[0]*nplog(tau.estim[0]) + eta.total*nplog(1-tau.estim[0])
    P_0[P_0 == np.inf] = MAX
    P_0[P_0 == -np.inf] = -MAX

    L = P_0*eta.estim[:,:1] + insum(P_1*eta.estim[:,1:],[1]) + apriori*(1-eta.estim[:,:1]) \
        - nplog(1+np.exp(apriori)) - insum(eta.estim*nplog(eta.estim),[1])

    L = L.sum()

    if np.isnan(L):
        print "Nan in LogLike"
        raise ValueError

    if np.isinf(L):
        print "Inf in LogLike"
        raise ValueError

    return L
import numpy as np
import scipy.optimize as opt
from scipy.special import digamma, gammaln
from utils import insum, outsum, nplog, EPS, MAX
import cPickle, time, math, pdb

logistic = lambda x: 1. / (1 + insum(np.exp(x), [1]))
newlogistic = lambda x: 1. / (1 + np.exp(x))


class Cascade:
    def __init__(self, L):

        self.L = L
        if math.frexp(self.L)[0] != 0.5:
            print "profile size is not a power of 2"
            pdb.set_trace()

        self.J = math.frexp(self.L)[1] - 1
        self.data = False
        self.value = dict()

    def setreads(self, reads):
        self.data = True
        N, L = reads.shape
        self.N = N
        if L != self.L:
            print "data dimensions do not match"
            pdb.set_trace()

        self.transform(reads)
def logposteriorodds_multinomial(reads, footprint, null):

    logodds = insum(reads * nplog(footprint.ravel()), [1]) - insum(reads * nplog(null), [1])

    return logodds.ravel()