def sw_sums(a, b): abw = apply_scale(w, a, b) np.divide(abw, 1 + abw, out = abw) abw[np.isnan(abw)] = 1 swr = abw.sum(1, keepdims = True) swc = abw.sum(0, keepdims = True) return swr, swc
def sw_sums(a, b): abw = apply_scale(w, a, b) np.divide(abw, 1 + abw, out=abw) abw[np.isnan(abw)] = 1 swr = abw.sum(1, keepdims=True) swc = abw.sum(0, keepdims=True) return swr, swc
def approximate_conditional_nll(A, w, sort_by_wopt_var = True): """Return approximate row/column-conditional NLL of binary matrix. Return the approximate nll of an observed binary matrix given specified Bernoulli weights, conditioned on having the observed margins. Inputs: A: observed data, (m x n) binary matrix w: weight matrix, (m x n) matrix with values in (0, +infty) Output: ncll: negative conditional log-likelihood """ assert(A.shape == w.shape) r = A.sum(1, dtype=np.int) c = A.sum(0, dtype=np.int) r, c, arrays, _ = _prune(r, c, A, w) A, w = arrays # Sizing m, n = len(r), len(c) if (m == 0) or (n == 0): return 0.0 # Sort the row margins (descending) rndx = np.argsort(-r) rsort = r[rndx] # Balance the weights a_scale, b_scale = canonical_scalings(w, r, c) wopt = apply_scale(w, a_scale, b_scale) if np.isnan(wopt).any(): wopt = w # Reorder the columns if sort_by_wopt_var: cndx = np.lexsort((-wopt.var(0), c)) else: cndx = np.argsort(c) csort = c[cndx] wopt = wopt[:,cndx] # Compute G G = _compute_G(r, m, n, wopt) return _compute_cnll(A, r, rsort, rndx, csort, cndx, m, n, G)
def approximate_conditional_nll(A, w, sort_by_wopt_var=True): """Return approximate row/column-conditional NLL of binary matrix. Return the approximate nll of an observed binary matrix given specified Bernoulli weights, conditioned on having the observed margins. Inputs: A: observed data, (m x n) binary matrix w: weight matrix, (m x n) matrix with values in (0, +infty) Output: ncll: negative conditional log-likelihood """ assert (A.shape == w.shape) r = A.sum(1, dtype=np.int) c = A.sum(0, dtype=np.int) r, c, arrays, _ = _prune(r, c, A, w) A, w = arrays # Sizing m, n = len(r), len(c) if (m == 0) or (n == 0): return 0.0 # Sort the row margins (descending) rndx = np.argsort(-r) rsort = r[rndx] # Balance the weights a_scale, b_scale = canonical_scalings(w, r, c) wopt = apply_scale(w, a_scale, b_scale) if np.isnan(wopt).any(): wopt = w # Reorder the columns if sort_by_wopt_var: cndx = np.lexsort((-wopt.var(0), c)) else: cndx = np.argsort(c) csort = c[cndx] wopt = wopt[:, cndx] # Compute G G = _compute_G(r, m, n, wopt) return _compute_cnll(A, r, rsort, rndx, csort, cndx, m, n, G)
def approximate_from_margins_weights(r, c, w, T = None, sort_by_wopt_var = True): """Return approximate samples from row/column-conditional binary matrices. Return a binary matrix (or a list of binary matrices) sampled approximately according to the specified Bernoulli weights, conditioned on having the specified margins. Inputs: r: row margins, length m c: column margins, length n w: weight matrix, (m x n) matrix with values in (0, +infty) T: number of matrices to sample sort_by_wopt_var: when enabled, column ordering depends on w Output: B_sample_sparse: (T default) sparse representation of (m x n) binary matrix (T >= 1) list of (sparse binary matrices, logQ, logP) More explicitly, consider independent Bernoulli random variables B(i,j) arranged as an m x n matrix B given the m-vector of row sums r and the n-vector of column sums c of the sample, i.e., given that sum(B_sample, 1) = r and sum(B_sample, 0) = c. An error is generated if no binary matrix agrees with r and c. B(i,j) is Bernoulli(p(i,j)) where p(i,j) = w(i,j)/(1+w(i,j)), i.e., w(i,j) = p(i,j)/(1-p(i,j)). [The case p(i,j) = 1 must be handled by the user in a preprocessing step, by converting to p(i,j) = 0 and decrementing the row and column sums appropriately.] The sparse representation used for output is a matrix giving the locations of the ones in the sample. If d = sum(r) = sum(c), then B_sample_sparse has dimensions (d x 2). If something goes wrong (due to undetected improper input), some of the rows of B_sample_sparse may [-1,-1], indicating no entry of B_sample. B_sample can be recovered from B_sample_sparse via: B_sample = np.zeros((m,n), dtype=np.bool) for i, j in B_sample_sparse: if i == -1: break B_sample[i,j] = 1 """ r_prune, c_prune, arrays_prune, unprune = _prune(r, c, w) w_prune = arrays_prune[0] _check_margins(r_prune, c_prune) ### Preprocessing # Sizing (making copies of m and n, as they are mutated during sampling) r_init = r_prune.copy() m, n = len(r_prune), len(c_prune) if (m == 0) or (n == 0): if T: return [unprune([np.empty((0,2)), 0, 0]) for t in xrange(T)] else: return np.empty((0,0)) m_init, n_init = m, n assert((m,n) == w_prune.shape) # Sort the row margins (descending) rndx_init = np.argsort(-r_prune) rsort = r_prune[rndx_init] # Balance the weights a_scale, b_scale = canonical_scalings(w_prune, r_prune, c_prune) wopt = apply_scale(w_prune, a_scale, b_scale) # Reorder the columns if sort_by_wopt_var: cndx = np.lexsort((-wopt.var(0), c_prune)) else: cndx = np.argsort(c_prune) csort = c_prune[cndx] wopt = wopt[:,cndx] # Precompute log weights logw = np.log(w_prune) # Compute G G = _compute_G(r_prune, m, n, wopt) # Generate the inverse index for the row orders to facilitate fast # sorting during the updating irndx_init = np.argsort(rndx_init) # Compute the conjugate of c cconj_init = conjugate(csort, m) # Get the running total of number of ones to assign count_init = np.sum(rsort) def do_sample(): sample_prune = _compute_sample(logw, count_init, m_init, n_init, r_init, rndx_init, irndx_init, csort, cndx, cconj_init, G) return unprune(sample_prune) if T: return [do_sample() for t in xrange(T)] else: return do_sample()[0]
return logkappa, logcvsq if __name__ == '__main__': # Test of binary matrix generation code m = np.random.random(size=(12,10)) < 0.3 r, c = np.sum(m, axis = 1), np.sum(m, axis = 0) print r, c A = arbitrary_from_margins(r, c) print np.sum(A, axis = 1), np.sum(A, axis = 0) # Test of "rc" balancing m = np.random.normal(10, 1, size = (6,5)) r, c = np.ones(6), np.ones(5) c[0] = 2 a, b = canonical_scalings(m, r, c) m_canonical = apply_scale(m, a, b) print m_canonical.sum(1) print m_canonical.sum(0) # Test of conjugate print conjugate([1,1,1,1,2,8], 10) # Test of approximate margins-conditional sampling N = 50 a_out = np.random.normal(0, 1, N) a_in = np.random.normal(0, 1, N) x = np.random.normal(0, 1, (N,N)) theta = 0.8 logit_P = np.zeros((N,N)) for i, a in enumerate(a_out): logit_P[i,:] += a
def approximate_from_margins_weights(r, c, w, T=None, sort_by_wopt_var=True): """Return approximate samples from row/column-conditional binary matrices. Return a binary matrix (or a list of binary matrices) sampled approximately according to the specified Bernoulli weights, conditioned on having the specified margins. Inputs: r: row margins, length m c: column margins, length n w: weight matrix, (m x n) matrix with values in (0, +infty) T: number of matrices to sample sort_by_wopt_var: when enabled, column ordering depends on w Output: B_sample_sparse: (T default) sparse representation of (m x n) binary matrix (T >= 1) list of (sparse binary matrices, logQ, logP) More explicitly, consider independent Bernoulli random variables B(i,j) arranged as an m x n matrix B given the m-vector of row sums r and the n-vector of column sums c of the sample, i.e., given that sum(B_sample, 1) = r and sum(B_sample, 0) = c. An error is generated if no binary matrix agrees with r and c. B(i,j) is Bernoulli(p(i,j)) where p(i,j) = w(i,j)/(1+w(i,j)), i.e., w(i,j) = p(i,j)/(1-p(i,j)). [The case p(i,j) = 1 must be handled by the user in a preprocessing step, by converting to p(i,j) = 0 and decrementing the row and column sums appropriately.] The sparse representation used for output is a matrix giving the locations of the ones in the sample. If d = sum(r) = sum(c), then B_sample_sparse has dimensions (d x 2). If something goes wrong (due to undetected improper input), some of the rows of B_sample_sparse may [-1,-1], indicating no entry of B_sample. B_sample can be recovered from B_sample_sparse via: B_sample = np.zeros((m,n), dtype=np.bool) for i, j in B_sample_sparse: if i == -1: break B_sample[i,j] = 1 """ r_prune, c_prune, arrays_prune, unprune = _prune(r, c, w) w_prune = arrays_prune[0] _check_margins(r_prune, c_prune) ### Preprocessing # Sizing (making copies of m and n, as they are mutated during sampling) r_init = r_prune.copy() m, n = len(r_prune), len(c_prune) if (m == 0) or (n == 0): if T: return [unprune([np.empty((0, 2)), 0, 0]) for t in xrange(T)] else: return np.empty((0, 0)) m_init, n_init = m, n assert ((m, n) == w_prune.shape) # Sort the row margins (descending) rndx_init = np.argsort(-r_prune) rsort = r_prune[rndx_init] # Balance the weights a_scale, b_scale = canonical_scalings(w_prune, r_prune, c_prune) wopt = apply_scale(w_prune, a_scale, b_scale) # Reorder the columns if sort_by_wopt_var: cndx = np.lexsort((-wopt.var(0), c_prune)) else: cndx = np.argsort(c_prune) csort = c_prune[cndx] wopt = wopt[:, cndx] # Precompute log weights logw = np.log(w_prune) # Compute G G = _compute_G(r_prune, m, n, wopt) # Generate the inverse index for the row orders to facilitate fast # sorting during the updating irndx_init = np.argsort(rndx_init) # Compute the conjugate of c cconj_init = conjugate(csort, m) # Get the running total of number of ones to assign count_init = np.sum(rsort) def do_sample(): sample_prune = _compute_sample(logw, count_init, m_init, n_init, r_init, rndx_init, irndx_init, csort, cndx, cconj_init, G) return unprune(sample_prune) if T: return [do_sample() for t in xrange(T)] else: return do_sample()[0]
if __name__ == '__main__': # Test of binary matrix generation code m = np.random.random(size=(12, 10)) < 0.3 r, c = np.sum(m, axis=1), np.sum(m, axis=0) print r, c A = arbitrary_from_margins(r, c) print np.sum(A, axis=1), np.sum(A, axis=0) # Test of "rc" balancing m = np.random.normal(10, 1, size=(6, 5)) r, c = np.ones((6, 1)), np.ones((1, 5)) c[0] = 2 a, b = canonical_scalings(m, r, c) m_canonical = apply_scale(m, a, b) print m_canonical.sum(1) print m_canonical.sum(0) # Test of conjugate print conjugate([1, 1, 1, 1, 2, 8], 10) # Test of approximate margins-conditional sampling N = 50 a_out = np.random.normal(0, 1, N) a_in = np.random.normal(0, 1, N) x = np.random.normal(0, 1, (N, N)) theta = 0.8 logit_P = np.zeros((N, N)) for i, a in enumerate(a_out): logit_P[i, :] += a