def lower_bound(gamma_1, gamma_2, alphas, Lambda, eta, r, votes, K, n_sens): gamma = np.array((gamma_1, gamma_2)) res = lgamma(np.sum(gamma)) - np.sum(lgamma(gamma)) res += lgamma(np.sum(alphas)) - np.sum(lgamma(alphas)) lambda_sum = np.sum(Lambda) for k in range(K): dig_lambda = digamma(Lambda[k]) - digamma(lambda_sum) res += (alphas[k] - 1) * dig_lambda res -= (Lambda[k] - 1) * dig_lambda for j in range(K): eta_sum = np.sum(eta[:, j, k]) for z in range(2): dig_eta = digamma(eta[z, j, k]) - digamma(eta_sum) res += (gamma[z] - 1) * dig_eta res -= (eta[z, j, k] - 1) * dig_eta for i in range(n_sens): for k in range(K): res += r[i, k] * (digamma(Lambda[k]) - digamma(lambda_sum)) r_smooth = (r[i, k] + 1e-10) / np.sum(r[i, k] + 1e-10) res -= r_smooth * np.log(r_smooth) #For numerical stability eta_sum = eta[0, :, k] + eta[1, :, k] res += r[i, k] * np.nansum( votes.iloc[i, :] * (digamma(eta[0, :, k]) - digamma(eta_sum)) + (1.0 - votes.iloc[i, :]) * (digamma(eta[1, :, k]) - digamma(eta_sum))) return (res)
def expected_utility(self, rewards, i, qs = np.linspace(0,1,1000)): log_p_marginal = (lgamma(self.a+i) -lgamma(self.a) +lgamma(self.a+self.m) -lgamma(self.a+self.m+i)) #integral = np.sum(self.q_pdf(qs)*qs**i) #for q in qs: # integral+= self.q_pdf(q)*q**i exp_u = np.exp(log_p_marginal)*self.utility(rewards[i]) return(exp_u)
def dmvt(X,center,sigma,df,output,workarray,return_log=False): if df==None or df<=0:#no DF so we return a mvn density return dmvnorm(X,center,sigma,output,workarray,return_log=False) residuals = workarray[:,:2] residuals[:] = X[:] residuals -= center m = sigma.shape[0] icov = la.inv(sigma) logdet =np.log(la.det(sigma)) distval = (np.dot(residuals,icov)*residuals).sum(1) output[:] = lgamma((m + df)/2.) - (lgamma(df/2.) + 0.5 * (logdet + m*np.log(pi * df))) - 0.5 * (df + m) * np.log(1 + distval/df) if not return_log: output[:] = np.exp(output)
def objective_z(z, *args): x = args[1] mu_b = args[2] prec = args[3] alpha = np.exp(z) A = z - mu_b A = np.matrix(A) re = -np.sum(lgamma(alpha + x) - lgamma(alpha)) + ( lgamma(np.sum(alpha + x)) - lgamma(np.sum(alpha))) + np.matmul( np.matmul(A, prec), A.T) return (float(re))
def LLH_poisson(x, mu, deltamu=1e-1, vectorCalc=True): from scipy.special import loggamma as lgamma llh = 0 if vectorCalc: mask_mu = mu != 0 llh += np.sum(x[mask_mu]*np.log(mu[mask_mu]) - mu[mask_mu] - lgamma(x[mask_mu] + 1)) llh += np.sum(x[~mask_mu] * np.log(deltamu) - mu[~mask_mu] - lgamma(x[~mask_mu] + 1)) else: for i in range(len(x)): if mu[i] > 0: llh += x[i] * np.log(mu[i]) - mu[i] - lgamma(x[i] + 1) else: llh += x[i] * np.log(deltamu) - mu[i] - lgamma(x[i] + 1) return -llh
def _log_kernel_norm(h, d, kernel): """Given a KernelType enumeration, compute the kernel normalization. h is the bandwidth, d is the dimension. """ # cdef DTYPE_t tmp, factor = 0 # cdef ITYPE_t k tmp = 0 factor = 0 k = 0 if kernel == GAUSSIAN_KERNEL: factor = 0.5 * d * LOG_2PI elif kernel == TOPHAT_KERNEL: factor = logVn(d) elif kernel == EPANECHNIKOV_KERNEL: factor = logVn(d) + log(2. / (d + 2.)) elif kernel == EXPONENTIAL_KERNEL: factor = logSn(d - 1) + lgamma(d) elif kernel == LINEAR_KERNEL: factor = logVn(d) - log(d + 1.) elif kernel == COSINE_KERNEL: # this is derived from a chain rule integration factor = 0 tmp = 2. / PI for k in range(1, d + 1, 2): factor += tmp tmp *= -(d - k) * (d - k - 1) * (2. / PI) ** 2 factor = log(factor) + logSn(d - 1) else: raise ValueError("Kernel code not recognized") return -factor - d * log(h)
def LLH_dima(x, mu, os, deltamu=1e-1, nohit_penalty=False, vectorCalc=True) : from scipy.special import loggamma as lgamma x = x.copy() mu = mu.copy() ''' Calculate dima LLH ''' llh = 0 if vectorCalc: mu_dima = (os*mu+x)/(os+1) mask_mu = mu != 0 mask_x = x != 0 llh += np.sum(os*mu[mask_mu]*np.log(mu_dima[mask_mu]/mu[mask_mu])) llh += np.sum(x[mask_x]*np.log(mu_dima[mask_x]/x[mask_x])) if nohit_penalty: # print "lel: ", np.sum(x[~mask_mu] * np.log(deltamu) - mu[~mask_mu] - lgamma(x[~mask_mu] + 1)) llh += np.sum(x[~mask_mu] * np.log(deltamu) - mu[~mask_mu] - lgamma(x[~mask_mu] + 1)) else: for i in range(len(x)): mu_dima = (os*mu[i]+x[i])/(os+1) if mu[i] != 0: llh += os*mu[i]*np.log(mu_dima/mu[i]) if x[i] != 0: llh += x[i]*np.log(mu_dima/x[i]) return -llh
def _ll_br(self, y, X, Z, params): nz = self.Z.shape[1] Xparams = params[:-nz] Zparams = params[-nz:] mu = self.link.inverse(np.dot(X, Xparams)) phi = self.link_phi.inverse(np.dot(Z, Zparams)) # TODO: derive a and b and constrain to > 0? if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf) ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \ + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) \ * np.log(1 - y) return ll
def lhood1(self, seg, LOG=True): """ Calculate the likelihood (defaults to log-lhood) for a single observation, which is either numeric or an ndarray. For likelihood of a group of observations, use .lhood(). """ nu = self._nu_n() mu = self._mu_n() s_nu = self._s_n() #print nu, mu, s_nu #print n, nu, mu, s_nu, lgamma, pi, log L = lgamma((nu+1)/2) - lgamma(nu/2) - 0.5*log(pi*s_nu) - 0.5*log((nu+1)/nu) L -= (nu+1)*0.5 * log(1 + (seg-mu)**2 * nu/(1+nu) / s_nu) if not LOG: return exp(L) else: return L
def lhood(self, seg, LOG=True): """ Calcunlate the likelihood (defaults to log) of a given segment, which should be a RunningVar instance. """ if not isinstance(seg, RunningVar): raise TypeError('seg must be a RunningVar instance (not %s)' % seg.__class__) n = seg.n nu = self._nu_n() mu = self._mu_n() s_nu = self._s_n() #print n, nu, mu, s_nu #print n, nu, mu, s_nu, lgamma, pi, log L = lgamma((nu+n)/2) - lgamma(nu/2) - n*0.5*log(pi*s_nu) - 0.5*log((nu+n)/nu) L -= (nu+n)*0.5 * log(1 + (seg.n*seg.s + (seg.m-mu)**2 * (n*nu)/(n+nu)) / s_nu) if not LOG: return exp(L) else: return L
def _llobs(self, endog, exog, exog_precision, params): """ Loglikelihood for observations with data arguments. Parameters ---------- endog : ndarray 1d array of endogenous variable. exog : ndarray 2d array of explanatory variables. exog_precision : ndarray 2d array of explanatory variables for precision. params : ndarray The parameters of the model, coefficients for linear predictors of the mean and of the precision function. Returns ------- loglike : ndarray The log likelihood for each observation of the model evaluated at `params`. """ y, X, Z = endog, exog, exog_precision nz = Z.shape[1] params_mean = params[:-nz] params_prec = params[-nz:] linpred = np.dot(X, params_mean) linpred_prec = np.dot(Z, params_prec) mu = self.link.inverse(linpred) phi = self.link_precision.inverse(linpred_prec) eps_lb = 1e-200 alpha = np.clip(mu * phi, eps_lb, np.inf) beta = np.clip((1 - mu) * phi, eps_lb, np.inf) ll = (lgamma(phi) - lgamma(alpha) - lgamma(beta) + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) * np.log(1 - y)) return ll
def calc(self, dist1, dist2): """Calculate the test statistic between two input distributions Parameters ---------- dist1 : array_like Input distribution. dist2 : array_like Input distribution. Returns ------- stat : float Test statistic """ dist1, dist2 = self.get_array_range(dist1, dist2) self.check_lengths(dist1, dist2) lnB = 0 n1 = np.sum(dist1) n2 = np.sum(dist2) nFactor = lgamma(n1 + n2 + 2) - lgamma(n1 + 1) - lgamma(n2 + 1) lnB += nFactor for i in range(0, len(dist1)): lnB += lgamma(dist1[i] + 1) + lgamma(dist2[i] + 1) - lgamma(dist1[i] + dist2[i] + 2) self.SetStat(lnB) self.stat = lnB return lnB
def __init__(self, *, data, maxid): self.maxid = maxid n = data.n N = data.N data = data.data mu0 = np.zeros(n) # Scoring parameters. am = 1 aw = n + am + 1 T0scale = am * (aw - n - 1) / (am + 1) T0 = T0scale * np.eye(n) TN = T0 + (N - 1) * np.cov(data.T) + ((am * N) / (am + N)) * np.outer( (mu0 - np.mean(data, axis=0)), (mu0 - np.mean(data, axis=0))) awpN = aw + N constscorefact = -(N / 2) * np.log(np.pi) + 0.5 * np.log(am / (am + N)) scoreconstvec = np.zeros(n) for i in range(n): awp = aw - n + i + 1 scoreconstvec[i] = constscorefact - lgamma(awp / 2) + lgamma( (awp + N) / 2) + (awp + i) / 2 * np.log(T0scale) # Just to keep the above calculations cleaner self.data = data self.n = n self.N = N self.mu0 = mu0 self.am = am self.aw = aw self.T0scale = T0scale self.T0 = T0 self.TN = TN self.awpN = awpN self.constscorefact = constscorefact self.scoreconstvec = scoreconstvec self._cache = {frozenset(): 0}
def TSCalc(self, N1, N2): N1, N2 = self.GetArrayRange(N1, N2) self.TestLengths(N1, N2) lnB = 0 n1 = np.sum(N1) n2 = np.sum(N2) try: from scipy.special import gammaln as lgamma except e: print e raise ImportError nFactor = lgamma(n1 + n2 + 2) - lgamma(n1 + 1) - lgamma(n2 + 1) lnB += nFactor for i in xrange(0, len(N1)): lnB += lgamma(N1[i] + 1) + lgamma(N2[i] + 1) - lgamma(N1[i] + N2[i] + 2) self.SetStat(lnB)
def lbetabinom(x, m, k, n, a, b): logpost = lgamma(m+1) + lgamma(a+b+n) + lgamma(a+k+x) + lgamma(b+n-k+m-x) - \ lgamma(x+1) - lgamma(m-x+1) - lgamma(a+k) - lgamma(b+n-k) - lgamma(a+b+n+m) return logpost
from pylab import * from scipy.special import loggamma as lgamma figure('poissoniana', figsize=[ 4.51, 2.7 ]).set_tight_layout(True) clf() poisson = lambda k, mu: exp(k * log(mu) - lgamma(k + 1) - mu) n = 10 ks = arange(n + 1) bar(ks, poisson(ks, 2), label="$\\mu=2$", width=.8, color='gray') bar(ks, poisson(ks, 5), label="$\\mu=5$", width=.5, color='lightgray') xlabel('$k$') ylabel('$P(k;\mu)$') legend(loc=0) savefig('poisson.pdf')
def logVn(n): """V_n = pi^(n/2) / gamma(n/2 - 1)""" return 0.5 * n * LOG_PI - lgamma(0.5 * n + 1)
from pylab import * from scipy.special import loggamma as lgamma figure('binomiale', figsize=[4.51, 2.7]).set_tight_layout(True) clf() binom = lambda k, n, p: exp(lgamma(1 + n) - lgamma(1 + k) - lgamma(1 + n - k) ) * p**k * (1 - p)**(n - k) n = 5 ks = arange(n + 1) bar(ks, binom(ks, n, 0.5), label="$p=%.1f$" % 0.5, width=.8, color='gray') bar(ks, binom(ks, n, 0.8), label="$p=%.1f$" % 0.8, width=.5, color='lightgray') xlabel('$k$') ylabel('$P(k;n,p)$') legend(loc=0) savefig('binomiale.pdf')
def fact(x): return lgamma(x+1) n = n11+n12+n22
def dt(x,mu,sd,df,log=False): c1 = lgamma((df+1)/2) - lgamma(df/2) + 0.5*np.log(1/(np.pi*df*sd)) logret = c1 - (df+1)*0.5*np.log(1+np.power(x-mu,2)/(sd*df)) if log: return logret else: return np.exp(logret)
def ddirichlet (x,p,return_log=False): ans = lgamma(p.sum())-lgamma(p).sum() + ((p-1)*np.log(x)).sum() if not return_log: return np.exp(ans) else: return ans
def diwishart (X,v,S,return_log=False): is_square(X) k = X.ndim if not return_log: return np.exp(-0.5*(S*la.inv(X)).trace()) * pow(la.det(X),(-(v+k+1)/2.0)) * pow(la.det(S),(v/2.0)) / ( pow(2,(v*k/2.0)) * pow(pi,(k*(k-1)/4.0))* gamma((v+1-np.array(range(1,k)))/2.0).prod()) else: return -0.5*(S*la.inv(X)).trace() + np.log(la.det(X))*(-(v+k+1)/2.) +np.log(la.det(S))*(v/2.0) - np.log(2)*(v*k/2.0) -np.log(pi)*(k*(k-1)/4.0) - lgamma((v+1-np.array(range(1,k)))/2.0).sum()
def fisher_exact(table, alternative="two-sided", hybrid=False, midP=False, simulate_pval=False, replicate=2000, workspace=300, attempt=2, seed=None): """Performs a Fisher exact test on a 2x2 contingency table. Parameters ---------- table : array_like of ints A 2x2 contingency table. Elements should be non-negative integers. alternative : {'two-sided', 'less', 'greater'}, optional Which alternative hypothesis to the null hypothesis the test uses. Default is 'two-sided'. Only used in the 2 x 2 case (with the scipy function). In every other case, the two-sided pval is returned. mult : int Specify the size of the workspace used in the network algorithm. Only used for non-simulated p-values larger than 2 x 2 table. You might want to increase this if the p-value failed! hybrid : bool Only used for larger than 2 x 2 tables, in which cases it indicates whether the exact probabilities (default) or a hybrid approximation thereof should be computed. midP : bool Use this to enable mid-P correction. Could lead to slow computation. This is not applicable for simulation p-values. `alternative` cannot be used if you enable midpoint correction. simulate_pval : bool Indicate whether to compute p-values by Monte Carlo simulation, in larger than 2 x 2 tables. replicate : int An integer specifying the number of replicates used in the MonteCarlo test. workspace : int An integer specifying the workspace size. Default value is 300. attempt : int Number of attempts to try, if the workspace size is not enough. On each attempt, the workspace size is doubled. seed : int Random number to use as seed. If a seed isn't provided. 4 bytes will be read from os.urandom. If this fail, getrandbits of the random module (with 32 random bits) will be used. In the particular case where both failed, the current time will be used Returns ------- p_value : float The probability of a more extreme table, where 'extreme' is in a probabilistic sense. Notes ----- The calculated odds ratio is different from the one R uses. This scipy implementation returns the (more common) "unconditional Maximum Likelihood Estimate", while R uses the "conditional Maximum Likelihood Estimate". For tables with large numbers, the (inexact) chi-square test implemented in the function `chi2_contingency` can also be used. Examples -------- Say we spend a few days counting whales and sharks in the Atlantic and Indian oceans. In the Atlantic ocean we find 8 whales and 1 shark, in the Indian ocean 2 whales, 5 sharks and in the Pacific 12 whales and 2 sharks. Then our contingency table is:: Atlantic Indian Pacific whales 8 2 12 sharks 1 5 2 We use this table to find the p-value: >>> from Fisher import fisher_exact >>> pvalue = fisher_exact([[8, 2, 12], [1, 5, 2]]) >>> pvalue 0.01183... """ workspace = 2 * int(workspace / 2) # int32 is not enough for the algorithm c = np.asarray(table, dtype=np.int64) if len(c.shape) > 2: raise ValueError( "The input `table` should not have more than 2 dimension.") if np.any(np.asarray(c.shape) < 2): raise ValueError("The input `table` must be at least of shape (2, 2).") # We expect all values to be non-negative if np.any(c < 0): raise ValueError("All values in `table` must be nonnegative.") nr, nc = c.shape if (nr == 2 and nc == 2): # I'm not sure what the fisher_exact module of ss do. # So use my own function if midp is asked if not midP: # in this case, just use the default scipy # could remove this in the future return ss.fisher_exact(c, alternative)[1] else: return _midp(c) else: pval = None if simulate_pval: sr = c.sum(axis=1) sc = c.sum(axis=0) # The zero colums and rows are droped here, see R function c = c[sr > 0, :][:, sc > 0] nr, nc = c.shape if nr < 2 or nc < 2: raise ValueError( 'Less than 2 non-zero column or row marginal,\n %s' % c) statistic = -np.sum(lgamma(c + 1)) tmp_res = _fisher_sim(c, replicate, seed) almost = 1 + 64 * np.finfo(np.double).eps pval = (1 + np.sum(tmp_res <= statistic / almost)) / \ (replicate + 1.) elif hybrid: expect, percnt, emin = 5, 80, 1 # this is the cochran condition pval = _execute_fexact(nr, nc, c, nr, expect, percnt, emin, workspace, attempt, midP) else: expect, percnt, emin = -1, 100, 0 pval = _execute_fexact(nr, nc, c, nr, expect, percnt, emin, workspace, attempt, midP) return pval