Esempio n. 1
0
 def classify(self, n,r,p):
     if n == 0: return 0
     q = 1-p; B = 1/math.log(1/p); u = math.log(n*q,1/p) 
     BetaGamma = B*tnseq_tools.getGamma()
     if n<EXACT: # estimate more accurately based on expected run len, using exact calc for small genes
       exprun = self.ExpectedRuns_cached(n,p)
       u = exprun-BetaGamma # u is mu of Gumbel (mean=mu+gamma*beta); matching of moments
     pval = 1 - scipy.exp(scipy.stats.gumbel_r.logcdf(r,u,B))
     if pval < 0.05: return(1)
     else: return(0)
Esempio n. 2
0
 def classify(self, n,r,p):
     if n == 0: return 0
     q = 1-p; B = 1/math.log(1/p); u = math.log(n*q,1/p) 
     BetaGamma = B*tnseq_tools.getGamma()
     if n<EXACT: # estimate more accurately based on expected run len, using exact calc for small genes
       exprun = self.ExpectedRuns_cached(n,p)
       u = exprun-BetaGamma # u is mu of Gumbel (mean=mu+gamma*beta); matching of moments
     pval = 1 - scipy.exp(scipy.stats.gumbel_r.logcdf(r,u,B))
     if pval < 0.05: return(1)
     else: return(0)
Esempio n. 3
0
 def F_non(self, p, N, R): # pass in P_nonins as p
     q = 1.0 - p; 
     BetaGamma = tnseq_tools.getGamma()/math.log(1/p)
     total = numpy.log(scipy.stats.beta.pdf(p,ALPHA,BETA))
     mu = numpy.log(N*q) / numpy.log(1/p)
     for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes
       if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma
     sigma = 1/math.log(1/p);
     #for i in range(len(N)): print '\t'.join([str(x) for x in N[i],R[i],self.ExpectedRuns_cached(int(N[i]),q),mu[i],scipy.stats.gumbel_r.pdf(R[i], mu[i], sigma)])
     total += numpy.sum(scipy.stats.gumbel_r.logpdf(R, mu, sigma))
     return(total)
Esempio n. 4
0
 def F_non(self, p, N, R): # pass in P_nonins as p
     q = 1.0 - p; 
     BetaGamma = tnseq_tools.getGamma()/math.log(1/p)
     total = numpy.log(scipy.stats.beta.pdf(p,ALPHA,BETA))
     mu = numpy.log(N*q) / numpy.log(1/p)
     for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes
       if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma
     sigma = 1/math.log(1/p);
     #for i in range(len(N)): print('\t'.join([str(x) for x in N[i],R[i],self.ExpectedRuns_cached(int(N[i]),q),mu[i],scipy.stats.gumbel_r.pdf(R[i], mu[i], sigma)]))
     total += numpy.sum(scipy.stats.gumbel_r.logpdf(R, mu, sigma))
     return(total)
Esempio n. 5
0
 def sample_Z(self, p, w1, N, R, S, T, mu_s, sigma_s, SIG):
     G = len(N)
     q = 1.0-p
     BetaGamma = tnseq_tools.getGamma()/math.log(1/p)
     mu = numpy.log(N*q) / numpy.log(1/p)
     for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes
       if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma
     sigma = 1.0/math.log(1.0/p);
     h0 = ((scipy.exp(scipy.stats.gumbel_r.logpdf(R,mu,sigma))) * scipy.stats.norm.pdf(S, mu_s*R, sigma_s)  * (1-w1))
     h1 = SIG * w1
     h1 += 1e-10; h0 += 1e-10 # to prevent div-by-zero; if neither class is probable, p(z1) should be ~0.5
     p_z1 = h1/(h0+h1)
     return scipy.stats.binom.rvs(1, p_z1, size=G)
Esempio n. 6
0
 def sample_Z(self, p, w1, N, R, S, T, mu_s, sigma_s, SIG):
     G = len(N)
     q = 1.0-p
     BetaGamma = tnseq_tools.getGamma()/math.log(1/p)
     mu = numpy.log(N*q) / numpy.log(1/p)
     for i in range(len(N)): # estimate more accurately based on expected run len, using exact calc for small genes
       if N[i]<EXACT: mu[i] = self.ExpectedRuns_cached(int(N[i]),p)-BetaGamma
     sigma = 1.0/math.log(1.0/p);
     h0 = ((scipy.exp(scipy.stats.gumbel_r.logpdf(R,mu,sigma))) * scipy.stats.norm.pdf(S, mu_s*R, sigma_s)  * (1-w1))
     h1 = SIG * w1
     h1 += 1e-10; h0 += 1e-10 # to prevent div-by-zero; if neither class is probable, p(z1) should be ~0.5
     p_z1 = h1/(h0+h1)
     return scipy.stats.binom.rvs(1, p_z1, size=G)