def compute_like_split(self, spec): tags = [self.sp, self.eg, self.bh, self.bd, self.ab, self.dam] params = [] for arr in self.p_outs: params.append(np.mean(arr)) ll = 0 llrand = 0 length = len(self.pred_data[spec][self.sc]) for ind in range(len(self.pred_data[spec][tags[0]])): denom = 1. for i in range(6): denom += self.pred_data[spec][ tags[i]][ind] * params[i] / self.rescale_rat[i] #if denoms > 1: # denom = 1 ll += nbinom.logpmf(self.pred_data[spec][self.sc][ind], n=params[6] * 1. / denom, p=params[7]) llrand += nbinom.logpmf( self.pred_data[spec][self.sc][np.random.choice(range(length))], n=params[6] * 1. / denom, p=params[7]) return [ll, llrand]
def compute_model_prob(self, spec, writefile): tags = [self.sp, self.eg, self.bh, self.bd, self.ab, self.dam] params = [] for arr in self.p_outs: params.append(np.mean(arr)) r_loc = [] for ind in range(len(self.data[spec][tags[0]])): denom = 1. for i in range(6): denom += self.data[spec][ tags[i]][ind] * params[i] / self.rescale_rat[i] r_loc.append(params[6] / denom) #if denoms > 1: # denom = 1 llall = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=params[7])) llallnull = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=self.baseroot[0], p=self.baseroot[1])) wh = open(writefile, 'w') wh.write(str(llall) + ' ' + str(llallnull) + '\n') wh.close()
def ll_t(self, spec): r_loc = self.get_r_t(spec) for thing in r_loc: if thing < 0: return -(10**50) if spec in ["SP", "EG"]: ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=np.multiply( r_loc, np.sqrt(self.data[spec][self.area])), p=self.p0_t)) else: ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=self.p0_t)) #ret = 0. #i = 0 #for sc in self.data[spec][self.sc]: # ret += logp0(sc,r_loc[i],self.p0,self.q0) # i += 1 mylen = len(self.data[spec][self.sc]) for val in self.al_t[spec]: #ret+=mylen*(norm.logpdf(val ,loc=0,scale=10)) ret += mylen * (gamma.logpdf(val, a=1, scale=300)) #ret+=mylen*(norm.logpdf(self.al[spec][5] ,loc=0,scale=10)) return ret
def dNBI(y: np.ndarray, location: np.ndarray, scale: np.ndarray): """Density function. """ n = 1 / scale p = n / (n + location) if len(scale) > 1: fy = np.where(scale > 1e-04, nbinom.logpmf(k=y, n=n, p=p), poisson.logpmf(k=y, mu=location)) else: fy = poisson.logpmf( k=y, mu=location) if scale < 1e-04 else nbinom.logpmf( k=y, n=n, p=p) return fy
def no_admixes(p, admixes, hard_cutoff=20, r=0): if admixes > hard_cutoff: return -float('inf') if r > 1: if hard_cutoff is None: return nbinom.logpmf(admixes, n=r, p=1.0 - p) else: return nbinom.logpmf(admixes, n=r, p=1.0 - p) - nbinom.logcdf( hard_cutoff, n=r, p=1.0 - p) else: if hard_cutoff is None: return geom.logpmf(admixes + 1, 1.0 - p) return geom.logpmf(admixes + 1, 1.0 - p) - geom.logcdf( hard_cutoff + 1, 1.0 - p)
def _ll_nbp(y, X, beta, alph, Q): ''' Negative Binomial Log-likelihood -- type P References: Greene, W. 2008. "Functional forms for the negtive binomial model for count data". Economics Letters. Volume 99, Number 3, pp.585-590. Hilbe, J.M. 2011. "Negative binomial regression". Cambridge University Press. Following notation in Greene (2008), with negative binomial heterogeneity parameter :math:`\alpha`: .. math:: \lambda_i = exp(X\beta)\\ \theta = 1 / \alpha \\ g_i = \theta \lambda_i^Q \\ w_i = g_i/(g_i + \lambda_i) \\ r_i = \theta / (\theta+\lambda_i) \\ ln \mathcal{L}_i = ln \Gamma(y_i+g_i) - ln \Gamma(1+y_i) + g_iln (r_i) + y_i ln(1-r_i) ''' mu = np.exp(np.dot(X, beta)) size = 1/alph*mu**Q prob = size/(size+mu) ll = nbinom.logpmf(y, size, prob) return ll
def _ll_nbp(y, X, beta, alph, Q): r''' Negative Binomial Log-likelihood -- type P References: Greene, W. 2008. "Functional forms for the negative binomial model for count data". Economics Letters. Volume 99, Number 3, pp.585-590. Hilbe, J.M. 2011. "Negative binomial regression". Cambridge University Press. Following notation in Greene (2008), with negative binomial heterogeneity parameter :math:`\alpha`: .. math:: \lambda_i = exp(X\beta)\\ \theta = 1 / \alpha \\ g_i = \theta \lambda_i^Q \\ w_i = g_i/(g_i + \lambda_i) \\ r_i = \theta / (\theta+\lambda_i) \\ ln \mathcal{L}_i = ln \Gamma(y_i+g_i) - ln \Gamma(1+y_i) + g_iln (r_i) + y_i ln(1-r_i) ''' mu = np.exp(np.dot(X, beta)) size = 1/alph*mu**Q prob = size/(size+mu) ll = nbinom.logpmf(y, size, prob) return ll
def _logpmf(self, x, mu, alpha, p, w): s, p = self.convert_params(mu, alpha, p) return _lazywhere(x != 0, (x, s, p, w), (lambda x, s, p, w: np.log(1. - w) + nbinom.logpmf(x, s, p)), np.log(w + (1. - w) * nbinom.pmf(x, s, p)))
def getloglikelihood2(kmat, mu_estimate, alpha, sumup=False, log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError( 'Count table dimension is not the same as mu vector dimension.') alpha = np.matrix(alpha).reshape(mu_estimate.shape[0], mu_estimate.shape[1]) kmat_r = np.round(kmat) mu_sq = np.multiply(mu_estimate, mu_estimate) var_vec = mu_estimate + np.multiply(alpha, mu_sq) nb_p = np.divide(mu_estimate, var_vec) nb_r = np.divide(mu_sq, var_vec - mu_estimate) if log: logp = nbinom.logpmf(kmat_r, nb_r, nb_p) else: logp = nbinom.pmf(kmat, nb_r, nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp = np.where(np.isnan(logp), 0, logp) if sumup: return np.sum(logp) else: return logp
def _nbinom_pmf(self, value, log=False): if log: return nbinom.logpmf(value, self.no_of_successes, self.prob_of_success) else: return nbinom.pmf(value, self.no_of_successes, self.prob_of_success)
def predict(Xtest, X0, X1, gamma_pars, e, f): a, b = gamma_pars n0 = np.shape(X0)[0] n1 = np.shape(X1)[0] logXpred0 = np.sum(nbinom.logpmf(Xtest, a + np.sum(X0[:, :-1], axis=0), (n0 + a) / (n0 + b + 1)), axis=1) logXpred1 = np.sum(nbinom.logpmf(Xtest, a + np.sum(X1[:, :-1], axis=0), (n1 + a) / (n1 + b + 1)), axis=1) y0Haty = logXpred0 + math.log((e + n0) / (n0 + n1 + e + f)) y1Haty = logXpred1 + math.log((f + n1) / (n0 + n1 + e + f)) return y0Haty, y1Haty
def ll_t(self, spec, pt, rt, alt): r_loc = self.get_r_t(spec, rt, alt) #print r_loc for thing in r_loc: if thing < 0: return -(10**50) ret = np.sum(nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=pt)) return ret
def nloglikeobs(self, params): alph = params[-1] beta = params[:-1] nY, nX = self.endog, self.exog mu = np.exp(np.dot(nX, beta)) size = 1 / alph prob = size / (size + mu) nloglik = -nbinom.logpmf(nY, size, prob) return nloglik
def _logpmf(self, x, mu, alpha, p, truncation): size, prob = self.convert_params(mu, alpha, p) pmf = 0 for i in range(int(np.max(truncation)) + 1): pmf += nbinom.pmf(i, size, prob) logpmf_ = nbinom.logpmf(x, size, prob) - np.log(1 - pmf) # logpmf_[x < truncation + 1] = - np.inf return logpmf_
def nloglikeobs_woz(self, params): nY, nX, endog = self.endog, self.exog, self.endog beta, alpha = params[:-1], params[-1] mu = np.exp(np.dot(nX, beta)) size = 1 / alpha prob = size / (size + mu) nloglik = -nbinom.logpmf(nY, size, prob) return nloglik
def test_logpmf(self): n, p = truncatednegbin.convert_params(5, 0.1, 2) nb_logpmf = nbinom.logpmf(6, n, p) - np.log(nbinom.sf(5, n, p)) tnb_logpmf = truncatednegbin.logpmf(6, 5, 0.1, 2, 5) assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-7) tnb_logpmf = truncatednegbin.logpmf(5, 5, 0.1, 2, 5) assert np.isneginf(tnb_logpmf)
def adj_loglikelihood(xVec, lenSampleRna, X, y, mu, sign): disp = np.repeat(xVec, lenSampleRna) n = 1 / disp p = n / (n + mu) loglik = sum(nbinom.logpmf(y, n, p)) diagVec = mu / (1 + np.dot(mu.transpose(), disp)) diagWM = np.diagflat(diagVec) xtwx = np.dot(np.dot(np.transpose(X), diagWM), X) coxreid = 0.5 * np.log(np.linalg.det(xtwx)) return (loglik - coxreid) * sign
def nloglikeobs_wzp(self, params): nY, nX, endog = self.endog, self.exog, self.endog beta, alpha = params[:-2], params[-2] gamma = 1 / (1 + np.exp(params[-1])) #check this mu = np.exp(np.dot(nX, beta)) size = 1 / alpha prob = size / (size + mu) nloglik = -np.log(1 - gamma) - nbinom.logpmf(nY, size, prob) nloglik[nY == 0] = -np.log(gamma + np.exp(-nloglik[nY == 0])) return nloglik
def pval(counts_A, dispersion_A, p_success_A, counts_B, dispersion_B, p_success_B): """ Given two observed counts and the dispersions and probability of success for each NS distribution, calculate the p-value for those counts """ # probability of observed data logging.debug("p_a: %f p_b: %f r_a: %f r_b: %f" % (p_success_A, p_success_B, dispersion_A, dispersion_B)) logging.debug("counts A: %d counts B: %d" % (counts_A, counts_B)) log_p_counts_A = nbinom.logpmf(counts_A, n=dispersion_A, p=p_success_A) log_p_counts_B = nbinom.logpmf(counts_B, n=dispersion_B, p=p_success_B) log_p_counts = log_p_counts_A + log_p_counts_B # now we will calculate the p-value, # conditioning on the total count total_count = counts_A + counts_B numerator = [] denominator = [] for a in range(int(total_count) + 1): b = total_count - a log_p_a = nbinom.logpmf(a, dispersion_A, p_success_A) log_p_b = nbinom.logpmf(b, dispersion_B, p_success_B) log_p_joint = log_p_a + log_p_b logging.debug( "a: %f b: %f log_p_a: %f log_p_b %f p_counts: %f p_joint: %f" % (a, b, log_p_a, log_p_b, log_p_counts, log_p_joint)) if log_p_joint <= log_p_counts: numerator.append(log_p_joint) denominator.append(log_p_joint) log_num_sum = logsumexp(numerator) log_dem_sum = logsumexp(denominator) if log_num_sum != 0 and log_dem_sum != 0: p_val = log_num_sum - log_dem_sum else: p_val = np.nan logging.debug("log_num_sum: %f log_dem_sum: %f log_p_val: %f" % (log_num_sum, log_dem_sum, p_val)) return p_val, log_p_counts_A, log_p_counts_B
def nloglikeobs(self, params): alpha = params[-1] beta = params[:-1] mu = np.exp(np.dot(self.exog, beta)) size = 1 / alpha prob = size / (size+mu) # ll = 0 # for idx, y in enumerate(self.endog): # ll += gammaln(y + size) - gammaln(size) - gammaln(y+1) + y * np.log(mu * alpha / (mu *alpha + 1))- size * np.log(mu * alpha + 1) ll = nbinom.logpmf( self.endog, size, prob) return -ll
def llNoPrior(self, spec): r_loc = self.get_r(spec) for thing in r_loc: if thing < 0: return -(10**50) if spec in ["SP", "EG"]: ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=np.multiply( r_loc, np.sqrt(self.data[spec][self.area])), p=self.p0)) else: ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=self.p0)) #mylen = len(self.data[spec][self.sc]) #for val in self.al[spec]: #ret+=mylen*(norm.logpdf(val ,loc=0,scale=10)) # ret+=mylen*(gamma.logpdf(val, a=1,scale=100)) #ret+=mylen*(norm.logpdf(self.al[spec][5] ,loc=0,scale=10)) return ret
def adj_loglikelihood_scalar(disp, X, y, mu, sign): n = 1 / disp p = n / (n + mu) loglik = sum(nbinom.logpmf(y, n, p)) diagVec = mu / (1 + mu * disp) diagWM = sp.diag(diagVec) xtwx = sp.dot(sp.dot(X.T, diagWM), X) coxreid = 0.5 * sp.log(sp.linalg.det(xtwx)) return (loglik - coxreid) * sign
def assignweight(data_dict, storeL, tt): # Get observations report_true_diff_tt = data_dict['infection'][tt] - data_dict['infection'][tt - 1] report_true_tt = data_dict['infection'][tt] d_true_diff_tt = data_dict['death'][tt] - data_dict['death'][tt - 1] test_true_tt = data_dict['test'][tt] test_diff_tt = data_dict['test'][tt] - data_dict['test'][tt - 1] # Get estimations death = np.clip(storeL[:, tt, 4] - storeL[:, tt - 1, 4], a_min=0, a_max=None) report_diff = np.clip(storeL[:, tt, 5] - storeL[:, tt - 1, 5], a_min=0, a_max=None) report = storeL[:, tt, 5] test = np.clip(storeL[:, tt, 6] , a_min=0, a_max=None) # Get the log likelihood from poisson distribution loglikSum_report_diff = nbinom.logpmf(k=report_true_diff_tt, n=1, p=1 / (1 + report_diff)) loglik_report = norm.logpdf(report_true_tt, loc=report, scale=report_true_tt) if (test_diff_tt > 0) & (data_dict['death'][tt] > 0): loglik_test = norm.logpdf(test_true_tt, loc=test, scale= test_true_tt) loglik_d = nbinom.logpmf(k=d_true_diff_tt, n=1, p=1 / (1 + death)) loglikSum = (loglikSum_report_diff + loglik_report + loglik_test + loglik_d) / 4 loglikSum = np.clip(loglikSum, a_min=-500, a_max=None) elif (data_dict['death'][tt] > 0): loglik_d = nbinom.logpmf(k=d_true_diff_tt, n=1, p=1 / (1 + death)) loglikSum = (loglikSum_report_diff + loglik_report + loglik_d) / 3 loglikSum = np.clip(loglikSum, a_min=-500, a_max=None) elif (test_diff_tt > 0): loglik_test =norm.logpdf(test_true_tt, loc=test, scale= test_true_tt) loglikSum = (loglikSum_report_diff + loglik_report + loglik_test) / 3 loglikSum = np.clip(loglikSum, a_min=-500, a_max=None) else: loglikSum = (loglikSum_report_diff + loglik_report) / 2 loglikSum = np.clip(loglikSum, a_min=-500, a_max=None) return np.exp(loglikSum)
def compute_pred_split(self, spec): tags = [self.sp, self.eg, self.bh, self.bd, self.ab, self.dam] params = [] for arr in self.p_outs: params.append(np.mean(arr)) denoms = [] r_loc = [] for ind in range(len(self.pred_data[spec][tags[0]])): denom = 1. for i in range(6): denom += self.pred_data[spec][ tags[i]][ind] * params[i] / self.rescale_rat[i] r_loc.append(params[6] / denom) #if denoms > 1: # denom = 1 denoms.append( (params[6] * (1 - params[7]) / params[7]) * 1. / denom) lls = nbinom.cdf(np.median(self.pred_data[spec][self.sc]), n=r_loc, p=params[7]) ll0 = nbinom.cdf(np.median(self.pred_data[spec][self.sc]), n=self.baseroot[0], p=self.baseroot[1]) llall = np.sum( nbinom.logpmf(self.pred_data[spec][self.sc], n=r_loc, p=params[7])) llallnull = np.sum( nbinom.logpmf(self.pred_data[spec][self.sc], n=self.baseroot[0], p=self.baseroot[1])) return (denoms, lls, ll0, np.median(self.pred_data[spec][self.sc]), llall, llallnull)
def return_max_ll(self, spec, writefile): ll0 = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=self.baseroot[0], p=self.baseroot[1])) ll1 = max(self.p_outs[8]) wh = open(writefile, 'w') wh.write(str(ll1) + ' ' + str(ll0) + '\n') wh.close()
def max_llh_given_r_param(neg_binom_r_param, count_data): """ For a given n parameter (r, n, etc.. the int parameter), give the log likelihood of observing data (counts) using the maximum likelihood estimator of the other negative binomial parameter (p). :param neg_binom_r_param: int (could be float too), parameter of neg binom distribution :param count_data: np array, count values we model with negative binomial :return: float, log likelihood """ num_counts = len(count_data) p = 1 - sum(count_data) / (num_counts * neg_binom_r_param + sum(count_data)) llh = sum(nbinom.logpmf(count_data, neg_binom_r_param, p)) return llh
def adj_loglikelihood(xVec, lenSampleRibo, lenSampleRna, X, y, mu, sign): disp = np.hstack([np.repeat(xVec[0], lenSampleRibo), np.repeat(xVec[1], lenSampleRna)]) n = 1 / disp p = n / (n + mu) loglik = sum(nbinom.logpmf(y, n, p)) diagVec = mu / (1 + np.dot(mu.transpose(), disp)) diagWM = np.diagflat(diagVec) xtwx = np.dot(np.dot(np.transpose(X), diagWM), X) coxreid = 0.5 * np.log(np.linalg.det(xtwx)) return (loglik - coxreid) * sign
def llNoPrior(self, spec): r_loc = self.get_r(spec) for thing in r_loc: if thing < 0: return -(10**50) ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=self.p0)) #mylen = len(self.data[spec][self.sc]) #for val in self.al[spec]: #ret+=mylen*(norm.logpdf(val ,loc=0,scale=10)) # ret+=mylen*(gamma.logpdf(val, a=1,scale=100)) #ret+=mylen*(norm.logpdf(self.al[spec][5] ,loc=0,scale=10)) return ret
def ll_nbinom(y, X, beta, alph): """ :param y: The responses :param X: The regressors :param beta: Vector of coefficients :param alph: Negative binomial heterogeneity parameter :return: Log likelihood """ mu = np.exp(np.dot(X, beta)) # expectation size = 1 / float(alph) # r parameter: number of trials prob = size / (size + mu) # or 1 / (1 + alph * mu): probability of success ll = nbinom.logpmf(y, size, prob) return ll
def getloglikelihood(kmat,mu_estimate,alpha): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' # logmu_est=sk.extended_design_mat * np.matrix(beta_est).getT() # these are all N*1 matrix mu_vec=[t[0] for t in mu_estimate.tolist()] k_vec=[t[0] for t in kmat.tolist()] if len(mu_vec) != len(k_vec): raise ValueError('Count table dimension is not the same as mu vector dimension.') var_vec=[t+alpha*t*t for t in mu_vec] nb_p=[mu_vec[i]/var_vec[i] for i in range(len(mu_vec))] nb_r=[mu_vec[i]*mu_vec[i]/(var_vec[i]-mu_vec[i]) for i in range(len(mu_vec))] logp=[nbinom.logpmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))] return sum(logp)
def _ll_nbt(y, X, beta, alph, C=0): r''' Negative Binomial (truncated) Truncated densities for count models (Cameron & Trivedi, 2005, 680): .. math:: f(y|\beta, y \geq C+1) = \frac{f(y|\beta)}{1-F(C|\beta)} ''' Q = 0 mu = np.exp(np.dot(X, beta)) size = 1/alph*mu**Q prob = size/(size+mu) ll = nbinom.logpmf(y, size, prob) - np.log(1 - nbinom.cdf(C, size, prob)) return ll
def _ll_nbt(y, X, beta, alph, C=0): ''' Negative Binomial (truncated) Truncated densities for count models (Cameron & Trivedi, 2005, 680): .. math:: f(y|\beta, y \geq C+1) = \frac{f(y|\beta)}{1-F(C|\beta)} ''' Q = 0 mu = np.exp(np.dot(X, beta)) size = 1/alph*mu**Q prob = size/(size+mu) ll = nbinom.logpmf(y, size, prob) - np.log(1 - nbinom.cdf(C, size, prob)) return ll
def adj_loglikelihood_scalar(disp, X, y, mu, sign): n = 1 / disp p = n / (n + mu) loglik = sum(nbinom.logpmf(y, n, p)) diagVec = mu / (1 + mu * disp) diagWM = sp.diag(diagVec) xtwx = sp.dot(sp.dot(X.T, diagWM), X) coxreid = 0.5 * sp.log(sp.linalg.det(xtwx)) ret = (loglik - coxreid) * sign if isinstance(ret, complex): raise complexException() return ret
def ll_t(self, spec): r_loc = self.get_r_t(spec) for thing in r_loc: if thing < 0: return -(10**50) ret = np.sum( nbinom.logpmf(self.data[spec][self.sc], n=r_loc, p=self.p0_t)) #ret = 0. #i = 0 #for sc in self.data[spec][self.sc]: # ret += logp0(sc,r_loc[i],self.p0,self.q0) # i += 1 mylen = len(self.data[spec][self.sc]) for val in self.al_t[spec]: ret += mylen * (norm.logpdf(val, loc=0, scale=10)) return ret
def adj_loglikelihood(xVec, lenSampleRibo, lenSampleRna, X, y, mu, sign): disp = sp.hstack([sp.repeat(xVec[0], lenSampleRibo), sp.repeat(xVec[1], lenSampleRna)]) n = 1 / disp p = n / (n + mu) loglik = sum(nbinom.logpmf(y, n, p)) diagVec = mu / (1 + sp.dot(mu.transpose(), disp)) diagWM = sp.diagflat(diagVec) xtwx = sp.dot(sp.dot(sp.transpose(X), diagWM), X) coxreid = 0.5 * sp.log(sp.linalg.det(xtwx)) ret = (loglik - coxreid) * sign #print "return value is " + str(ret) if isinstance(ret, complex): raise complexException() return ret
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' #logmu_est=sk.extended_design_mat * np.matrix(beta_est).getT() # Tracer()() #mu_estimate= np.exp(logmu_est) # these are all N*1 matrix #mu_vec=np.array([t[0] for t in mu_estimate.tolist()]) #k_vec=np.array([round(t[0]) for t in kmat.tolist()]) #if len(mu_vec) != len(k_vec): # raise ValueError('Count table dimension is not the same as mu vector dimension.') # var_vec=mu_vec+alpha*mu_vec*mu_vec # nb_p=[mu_vec[i]/var_vec[i] for i in range(len(mu_vec))] # nb_r=[mu_vec[i]*mu_vec[i]/(var_vec[i]-mu_vec[i]) for i in range(len(mu_vec))] # if log: # logp=np.array([nbinom.logpmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))]) #else: # logp=np.array([nbinom.pmf(k_vec[i],nb_r[i],nb_p[i]) for i in range(len(mu_vec))]) if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError('Count table dimension is not the same as mu vector dimension.') kmat_r=np.round(kmat) mu_sq=np.multiply(mu_estimate,mu_estimate) var_vec=mu_estimate+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_estimate,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_estimate) if log: logp=nbinom.logpmf(kmat_r,nb_r,nb_p) else: logp=nbinom.pmf(kmat,nb_r,nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp=np.where(np.isnan(logp),0,logp) if sumup: return np.sum(logp) else: return logp
def getloglikelihood2(k_list,mu_list,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta and alpha ''' # solution 1 mu_sq=np.multiply(mu_list,mu_list) var_vec=mu_list+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_list,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_list) if log: logp=nbinom.logpmf(k_list,nb_r,nb_p) else: logp=nbinom.pmf(k_list,nb_r,nb_p) if np.isnan(np.sum(logp)): logp=np.where(np.isnan(logp),0,logp) #print("hi",np.sum(logp)) if sumup: #print(np.sum(logp)) return np.sum(logp) else: #pass return logp
def getloglikelihood2(kmat,mu_estimate,alpha,sumup=False,log=True): ''' Get the log likelihood estimation of NB, using the current estimation of beta ''' if kmat.shape[0] != mu_estimate.shape[0]: raise ValueError('Count table dimension is not the same as mu vector dimension.') kmat_r=np.round(kmat) mu_sq=np.multiply(mu_estimate,mu_estimate) var_vec=mu_estimate+np.multiply(alpha, mu_sq) nb_p=np.divide(mu_estimate,var_vec) nb_r=np.divide(mu_sq,var_vec-mu_estimate) if log: logp=nbinom.logpmf(kmat_r,nb_r,nb_p) else: logp=nbinom.pmf(kmat,nb_r,nb_p) if np.isnan(np.sum(logp)): #raise ValueError('nan values for log likelihood!') logp=np.where(np.isnan(logp),0,logp) if sumup: return np.sum(logp) else: return logp
def testNegBinomNoSd(k, r, mu): truth = nbinom.logpmf(k,r,mu) val = u.log_neg_binom_likelihood(k,r,mu) return truth==val
def testNegBinomWSd(k, r, mu, sd): truth = mean([nbinom.logpmf(k, i, mu) for i in range(int(r-0.5*sd),int(r+0.5*sd)+1)]) val = u.log_neg_binom_likelihood(k,r,mu,sd) print truth, val return truth==val
if not self.bins: probs = [] i = 0 v_old = -1 while True: v = self.nbin(i, self.mu, 1./self.alpha) probs.append(v) i += 1 if fabs(v_old - v) < 10**-10: break v_old = v self.bins = map(lambda x: float(x), np.add.accumulate(probs)) return np.digitize(random_sample(1), self.bins)[0] if __name__ == '__main__': neg_bin = NegBin(0.1, 0.00000000001) s=0 ew = 0 distr = {'n': 10, 'p': 0.1} for i in range(10): #v = neg_bin.pdf(i) v = nbinom.logpmf(i, distr['n'], distr['p']) #v_log = neg_bin.logpdf(i) #s += v #ew += i * v #print(i, v, log(v), v_log, sep='\t') print(i, v, sep='\t') #print(s, ew)
def _ll_nb2(y, X, beta, alph): mu = np.exp(np.dot(X, beta)) size = 1 / alph prob = size / (size + mu) ll = nbinom.logpmf(y, size, prob) return ll
def test_logpmf(self): n, p = sm.distributions.zinegbin.convert_params(5, 1, 1) nb_logpmf = nbinom.logpmf(2, n, p) tnb_logpmf = sm.distributions.zinegbin.logpmf(2, 5, 1, 1, 0.005) assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
def test_logpmf_p2(self): n, p = sm.distributions.zinegbin.convert_params(10, 1, 2) nb_logpmf = nbinom.logpmf(200, n, p) tnb_logpmf = sm.distributions.zinegbin.logpmf(200, 10, 1, 2, 0.01) assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)