def nb_ll(data, P, R): """ Returns the negative binomial log-likelihood of the data. Args: data (array): genes x cells P (array): NB success probability param - genes x clusters R (array): NB stopping param - genes x clusters Returns: cells x clusters array of log-likelihoods """ # TODO: include factorial... #data = data + eps genes, cells = data.shape clusters = P.shape[1] lls = np.zeros((cells, clusters)) for c in range(clusters): P_c = P[:, c].reshape((genes, 1)) R_c = R[:, c].reshape((genes, 1)) # don't need constant factors... ll = gammaln(R_c + data) - gammaln(R_c) #- gammaln(data + 1) ll += data * np.log(P_c) + xlog1py(R_c, -P_c) #new_ll = np.sum(nbinom.logpmf(data, R_c, P_c), 0) lls[:, c] = ll.sum(0) return lls
def pdf(pX, pR, pP): """ PDF for a continuous generalization of NB distribution """ gamma_part = gammaln(pR + pX) - gammaln(pX + 1) - gammaln(pR) return np.exp(gamma_part + (pR * np.log(pP)) + special.xlog1py(pX, -pP))
def obj_u(u, V, n_pos, n_neg, omega, reg_wt, alpha=1.0, sigma=1.0, UV=False, verbose=False): U = u.reshape(-1, V.shape[1]) M = U.dot(V.T) m = M[omega] # LL = np.sum(n_pos * (m - np.log(1 + np.exp(m))) - n_neg * np.log(1 + np.exp(m))) LL = np.sum(n_pos * m / sigma - xlog1py(n_pos + n_neg, np.exp(m / sigma))) if not UV: M_sq = np.square(M / alpha) if np.any(M_sq > 1.0): reg = -np.inf else: reg = reg_wt * np.sum(np.log1p(-M_sq)) else: U_sq = np.square(U / alpha) V_sq = np.square(V / alpha) if np.any(U_sq > 1.0) or np.any(V_sq > 1.0): reg = -np.inf else: reg = reg_wt * (np.sum(np.log1p(-U_sq)) + np.sum(np.log1p(-V_sq))) if verbose: print('LL = {}, reg = {}'.format(LL, reg)) return -(LL + reg)
def gen(x, y, name): """Generate fixture data and writes them to file. # Arguments * `x`: first parameter * `y`: second parameter * `name::str`: output filename # Examples ``` python python> x = random(500) * 5.0 python> y = random(500) * 5.0 python> gen(x, y, './data.json') ``` """ out = xlog1py(x, y) # Store data to be written to file as a dictionary: data = {"x": x.tolist(), "y": y.tolist(), "expected": out.tolist()} # Based on the script directory, create an output filepath: filepath = os.path.join(DIR, name) # Write the data to the output filepath as JSON: with open(filepath, "w") as outfile: json.dump(data, outfile)
def objective_func(beta, X, Y): """ Definition: objective_func trains the multiclass logistic regression Params: beta is the weight that paramatrized the linear functions in order to map X to Y. X is the features used to train the softmax function Y is the classes used to train the softmax function Return: returns the cross entropy loss function """ numRows = X.shape[0] cost = 0.0 for i in range(numRows): xi = X[i, :] yi = Y[i] # p = sigmoid(np.dot(xi,beta)) # cost += yi*(np.log(p)) + ((1 - yi)*(np.log(1-p))) # The expit function, also known as the logistic sigmoid function, # is defined as expit(x) = 1/(1+exp(-x)). It is the inverse of the logit function. # *** This avoids the issues of overflow that is display as Nan for the cost function p = expit(np.dot(xi, beta)) cost += yi * (np.log(p)) + xlog1py(1 - yi, p) return -cost
def cdf(x, probs): # do not evaluate close to 0.5 tentative_cdf = sp_special.xlogy(x, probs) + sp_special.xlog1py( 1 - x, -probs) tentative_cdf = (np.expm1(tentative_cdf) + probs) / (2 * probs - 1.) correct_above = np.where(x > 1., 1., tentative_cdf) return np.where(x < 0, 0., correct_above)
def beta_logpdf(x, a, b, loc=0, scale=1): x = (x - loc) / scale z = special.xlog1py(b - 1.0, -x) + special.xlogy(a - 1.0, x) z -= special.betaln(a, b) z -= np.log(scale) z = np.where((x < 0) | (x > 1), -np.inf, z) if z.ndim == 0: return z[()] return z
def _real_pdf(self, arm, theta): p = 0 for pos in range(self.n_positions): pos_prob = self.position_probabilities[pos] a = self.S_kl[arm, pos] b = self.N_kl[arm, pos] - self.S_kl[arm, pos] p += sc.xlog1py(b, -theta * pos_prob) + sc.xlogy(a, theta) p -= sc.betaln(a, b) p += a * np.log(pos_prob) return np.exp(p)
def dbin_llf(r, p, n): r""" binomial distribution r ~ binomial(p, n); r = 0, ..., n ..math:: P(r; p, n) = \frac{n!}{r!(n-r)!} p^r (1-p)^{n-r} """ return binomln(n, r) + xlogy(r, p) + xlog1py(n - r, -p)
def log_pmf(self, *X): # check array for numpy structure X = check_array(X, reduce_args=True, ensure_1d=True) # Floor values of X X = np.floor(X) # Expand all components of log-pmf out = ( sc.gammaln(self.n_trials + 1) - (sc.gammaln(X + 1) + sc.gammaln(self.n_trials - X + 1)) + sc.xlogy(X, self.bias) + sc.xlog1py(self.n_trials - X, -self.bias) ) return out
def VerifyPdfWithNumpy(self, pspherical, atol=1e-4): """Verifies log_prob evaluations with numpy/scipy. Both uniform random points and sampled points are evaluated. Args: pspherical: A `tfp.distributions.PowerSpherical` instance. atol: Absolute difference tolerable. """ dim = tf.compat.dimension_value(pspherical.event_shape[-1]) nsamples = 10 # Sample some random points uniformly over the hypersphere using numpy. sample_shape = [nsamples] + tensorshape_util.as_list( pspherical.batch_shape) + [dim] uniforms = np.random.randn(*sample_shape) uniforms /= np.linalg.norm(uniforms, axis=-1, keepdims=True) uniforms = uniforms.astype(dtype_util.as_numpy_dtype(pspherical.dtype)) # Concatenate in some sampled points from the distribution under test. pspherical_samples = pspherical.sample(sample_shape=[nsamples], seed=test_util.test_seed()) samples = tf.concat([uniforms, pspherical_samples], axis=0) samples = tf.debugging.check_numerics(samples, 'samples') samples = self.evaluate(samples) log_prob = pspherical.log_prob(samples) log_prob = self.evaluate(log_prob) # Check that the log_prob is not nan or +inf. It can be -inf since # if we sample a direction diametrically opposite to the mean direction, # we'll get an inner product of -1. self.assertFalse(np.any(np.isnan(log_prob))) self.assertFalse(np.any(np.isposinf(log_prob))) conc = self.evaluate(pspherical.concentration) mean_dir = self.evaluate(pspherical.mean_direction) alpha = (dim - 1.) / 2. + conc beta = (dim - 1.) / 2. expected = ( sp_special.xlog1py(conc, np.sum(samples * mean_dir, axis=-1)) - (alpha + beta) * np.log(2.) - beta * np.log(np.pi) - sp_special.gammaln(alpha) + sp_special.gammaln(alpha + beta)) self.assertAllClose(expected, log_prob, atol=atol)
def _logpmf(self, x, n, p): k = floor(x) combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1))) return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
def _logpmf(self, k, p): return special.xlog1py(k - 1, -p) + log(p)
def _logpmf(self, x, n, p): coeff = gamln(n+x) - gamln(x+1) - gamln(n) return coeff + n*log(p) + special.xlog1py(x, -p)
def test_xlog1py(x, y, jit_fn): fn = xlog1py if not jit_fn else jit(xlog1py) assert_allclose(fn(x, y), osp_special.xlog1py(x, y))
def f(t, y): return -np.sum(xlogy(t, y) + xlog1py(1 - t, -y)) / t.shape[-1]
def h(p): p = p.cpu().numpy() return -(sc.xlogy(p, p) + sc.xlog1py(1 - p, -p))
def dnegbin_llf(x, p, r): """negative binomial(x; p, r); x=0,1,2,...""" return binomln(x + r - 1, x) + xlogy(r, p) + xlog1py(x, -p)
def binary_entropy(probs): probs = probs.copy() probs[probs < 0] = 0 probs[probs > 1] = 1 return special.entr(probs) - special.xlog1py(1 - probs, -probs)
def dgeom_llf(x, p): """geometric(x; p); x=1,2,3,...""" return xlog1py(x - 1, -p) + log(p)
def binary_entropy(self, x): # mutual information support function return -(sc.xlogy(x, x) + sc.xlog1py(1 - x, -x)) / np.log(2)
def compute_conservativity(self, q_true): """ Computes the degrees of conservativity for quantifiers (represented as binary strings). Parameters ---------- q_true : array-like[int] The quantifier's bitstring. Returns ------- dict of (string : float) Mapping sending conservativity type to corresponding degree of conservativity of the quantifier's bitstring. """ rv_preimage, rv_probs = self.precomputed[ MeasureCalculator.CONSERVATIVITY] conservativities = {} if all(q_true) or not any(q_true): for cons_type in rv_preimage: conservativities[cons_type] = 1.0 else: # Probability that the quantifier is true p_q_true = sum(q_true) / len(q_true) # Entropy of the quantifier's truth-value r.v. # xlog1py takes care of zero probabilities. Abs is taken to account # for negative zero H_q_true = abs( (entr(p_q_true) - xlog1py(1 - p_q_true, -p_q_true)) / np.log(2)) for cons_type in rv_preimage: # Compute the conditional entropy of the truth-value r.v. # conditioned on the conservativity r.v. H_cond = 0 # For every value the conservativity r.v. takes: for cons_tuple in rv_probs[cons_type]: # Probability of the value cons_tuple being output by the # conservativity r.v. p = rv_probs[cons_type][cons_tuple] q_cond_conserv_indices = rv_preimage[cons_type][cons_tuple] # Condition original quantifier string on the output of the # r.v. q_cond_vals = bitarray(len(q_cond_conserv_indices)) for new_i, original_i in enumerate(q_cond_conserv_indices): q_cond_vals[new_i] = q_true[original_i] # Probability of the quantifier being true conditioned on the # given output of the conservativity r.v. p_q_cond_vals = sum(q_cond_vals) / len(q_cond_vals) H_cond += abs(p * (entr(p_q_cond_vals) - xlog1py( 1 - p_q_cond_vals, -p_q_cond_vals) / np.log(2))) conservativities[cons_type] = 1 - (H_cond / H_q_true) return conservativities
def logpdf_beta(x, a, b): # log of the pdf for beta distribution (taken from scipy.stats.beta) lPx = special.xlog1py(b - 1.0, -x) + special.xlogy(a - 1.0, x) lPx -= special.betaln(a, b) return lPx
def compute_monotonicity(self, q_true): """ Computes each of the four types of degrees of monotonicity for quantifiers (represented as binary strings). Parameters ---------- q_true : array-like[int] The quantifier's bitstring. Returns ------- dict (string : float) Dict contains the four degrees of monotonicity, indexed by the monotonicity type name. """ corresp_models = self.precomputed[MeasureCalculator.MONOTONICITY] monotonicities = {} if all(q_true) or not any(q_true): for mon_type in corresp_models: monotonicities[mon_type] = 1.0 else: p_q_true = sum(q_true) / len(q_true) # Entropy of the quantifier's truth-value r.v. # xlog1py takes care of zero probabilities. Abs is taken to account # for negative zero H_q_true = abs( (entr(p_q_true) - xlog1py(1 - p_q_true, -p_q_true)) / np.log(2)) for mon_type in corresp_models: # Build truth-value string for the monotonicity r.v. (w.r.t. # mon_type) q_true_mon = q_true.copy() for i in range(len(q_true_mon)): if not q_true_mon[i]: corresp_indices = corresp_models[mon_type][i] for j in corresp_indices: if q_true_mon[j]: q_true_mon[i] = 1 break # Probability that the monotonicity r.v. is true p_q_true_mon = sum(q_true_mon) / len(q_true_mon) # Condition original truth-value r.v. on the monotonicity r.v. q_true_cond_mon = [ q_true[i] for i in range(len(q_true)) if q_true_mon[i] ] if len(q_true_cond_mon) == 0: H_q_true_cond_mon = 0 else: q_true_cond_mon = bitarray(q_true_cond_mon) # Probability of original truth-value r.v. being true # conditioned on the monotonicity r.v. being true p_q_true_cond_mon = sum(q_true_cond_mon) / len( q_true_cond_mon) H_q_true_cond_mon = abs( p_q_true_mon * (entr(p_q_true_cond_mon) - xlog1py(1 - p_q_true_cond_mon, -p_q_true_cond_mon)) / np.log(2)) monotonicities[mon_type] = 1 - (H_q_true_cond_mon / H_q_true) return monotonicities
def binary_entropy(x): return -(sc.xlogy(x, x) + sc.xlog1py(1 - x, -x))/np.log(2)
def _logpmf(self, x, n, p): coeff = gamln(n + x) - gamln(x + 1) - gamln(n) return coeff + n * log(p) + special.xlog1py(x, -p)
def dbeta_llf(x, alpha, beta): """beta(x; alpha, beta); x in (0,1)""" return gammaln(alpha+beta)-gammaln(alpha)-gammaln(beta) \ + xlogy(alpha-1, x) + xlog1py(beta-1, -x)
def _logpmf(self, x, n, p): k = floor(x) combiln = (gamln(n + 1) - (gamln(k + 1) + gamln(n - k + 1))) return combiln + special.xlogy(k, p) + special.xlog1py(n - k, -p)
def h(p): return -(sc.xlogy(p, p) + sc.xlog1py(1 - p, -p))