def sw_sums(a, b):
     abw = apply_scale(w, a, b)
     np.divide(abw, 1 + abw, out = abw)
     abw[np.isnan(abw)] = 1
     swr = abw.sum(1, keepdims = True)
     swc = abw.sum(0, keepdims = True)
     return swr, swc
Ejemplo n.º 2
0
 def sw_sums(a, b):
     abw = apply_scale(w, a, b)
     np.divide(abw, 1 + abw, out=abw)
     abw[np.isnan(abw)] = 1
     swr = abw.sum(1, keepdims=True)
     swc = abw.sum(0, keepdims=True)
     return swr, swc
def approximate_conditional_nll(A, w, sort_by_wopt_var = True):
    """Return approximate row/column-conditional NLL of binary matrix.
    
Return the approximate nll of an observed binary matrix given
specified Bernoulli weights, conditioned on having the observed
margins.

Inputs:
  A: observed data, (m x n) binary matrix
  w: weight matrix, (m x n) matrix with values in (0, +infty)
Output:
  ncll: negative conditional log-likelihood
"""
    assert(A.shape == w.shape)

    r = A.sum(1, dtype=np.int)
    c = A.sum(0, dtype=np.int)

    r, c, arrays, _ = _prune(r, c, A, w)
    A, w = arrays

    # Sizing
    m, n = len(r), len(c)
    if (m == 0) or (n == 0):
        return 0.0

    # Sort the row margins (descending)
    rndx = np.argsort(-r)
    rsort = r[rndx]

    # Balance the weights
    a_scale, b_scale = canonical_scalings(w, r, c)
    wopt = apply_scale(w, a_scale, b_scale)
    if np.isnan(wopt).any():
        wopt = w

    # Reorder the columns
    if sort_by_wopt_var:
        cndx = np.lexsort((-wopt.var(0), c))
    else:
        cndx = np.argsort(c)
    csort = c[cndx]
    wopt = wopt[:,cndx]

    # Compute G
    G = _compute_G(r, m, n, wopt)

    return _compute_cnll(A, r, rsort, rndx, csort, cndx, m, n, G)
Ejemplo n.º 4
0
def approximate_conditional_nll(A, w, sort_by_wopt_var=True):
    """Return approximate row/column-conditional NLL of binary matrix.
    
Return the approximate nll of an observed binary matrix given
specified Bernoulli weights, conditioned on having the observed
margins.

Inputs:
  A: observed data, (m x n) binary matrix
  w: weight matrix, (m x n) matrix with values in (0, +infty)
Output:
  ncll: negative conditional log-likelihood
"""
    assert (A.shape == w.shape)

    r = A.sum(1, dtype=np.int)
    c = A.sum(0, dtype=np.int)

    r, c, arrays, _ = _prune(r, c, A, w)
    A, w = arrays

    # Sizing
    m, n = len(r), len(c)
    if (m == 0) or (n == 0):
        return 0.0

    # Sort the row margins (descending)
    rndx = np.argsort(-r)
    rsort = r[rndx]

    # Balance the weights
    a_scale, b_scale = canonical_scalings(w, r, c)
    wopt = apply_scale(w, a_scale, b_scale)
    if np.isnan(wopt).any():
        wopt = w

    # Reorder the columns
    if sort_by_wopt_var:
        cndx = np.lexsort((-wopt.var(0), c))
    else:
        cndx = np.argsort(c)
    csort = c[cndx]
    wopt = wopt[:, cndx]

    # Compute G
    G = _compute_G(r, m, n, wopt)

    return _compute_cnll(A, r, rsort, rndx, csort, cndx, m, n, G)
def approximate_from_margins_weights(r, c, w, T = None,
                                     sort_by_wopt_var = True):
    """Return approximate samples from row/column-conditional binary matrices.
                                     
Return a binary matrix (or a list of binary matrices) sampled
approximately according to the specified Bernoulli weights,
conditioned on having the specified margins.
Inputs:
  r: row margins, length m
  c: column margins, length n
  w: weight matrix, (m x n) matrix with values in (0, +infty)
  T: number of matrices to sample
  sort_by_wopt_var: when enabled, column ordering depends on w
Output:
  B_sample_sparse: (T default) sparse representation of (m x n) binary matrix
                   (T >= 1) list of (sparse binary matrices, logQ, logP)

More explicitly, consider independent Bernoulli random variables
B(i,j) arranged as an m x n matrix B given the m-vector of row sums
r and the n-vector of column sums c of the sample, i.e., given that
sum(B_sample, 1) = r and sum(B_sample, 0) = c.

An error is generated if no binary matrix agrees with r and c.

B(i,j) is Bernoulli(p(i,j)) where p(i,j) = w(i,j)/(1+w(i,j)), i.e.,
w(i,j) = p(i,j)/(1-p(i,j)).  [The case p(i,j) = 1 must be handled by
the user in a preprocessing step, by converting to p(i,j) = 0 and
decrementing the row and column sums appropriately.]

The sparse representation used for output is a matrix giving the
locations of the ones in the sample. If d = sum(r) = sum(c), then
B_sample_sparse has dimensions (d x 2). If something goes wrong (due
to undetected improper input), some of the rows of B_sample_sparse
may [-1,-1], indicating no entry of B_sample.

B_sample can be recovered from B_sample_sparse via:

    B_sample = np.zeros((m,n), dtype=np.bool)
    for i, j in B_sample_sparse:
        if i == -1: break 
        B_sample[i,j] = 1
"""
    r_prune, c_prune, arrays_prune, unprune = _prune(r, c, w)
    w_prune = arrays_prune[0]

    _check_margins(r_prune, c_prune)

    ### Preprocessing

    # Sizing (making copies of m and n, as they are mutated during sampling)
    r_init = r_prune.copy()
    m, n = len(r_prune), len(c_prune)
    if (m == 0) or (n == 0):
        if T:
            return [unprune([np.empty((0,2)), 0, 0]) for t in xrange(T)]
        else:
            return np.empty((0,0))
    m_init, n_init = m, n
    assert((m,n) == w_prune.shape)

    # Sort the row margins (descending)
    rndx_init = np.argsort(-r_prune)
    rsort = r_prune[rndx_init]

    # Balance the weights
    a_scale, b_scale = canonical_scalings(w_prune, r_prune, c_prune)
    wopt = apply_scale(w_prune, a_scale, b_scale)

    # Reorder the columns
    if sort_by_wopt_var:
        cndx = np.lexsort((-wopt.var(0), c_prune))
    else:
        cndx = np.argsort(c_prune)
    csort = c_prune[cndx]
    wopt = wopt[:,cndx]

    # Precompute log weights
    logw = np.log(w_prune)

    # Compute G
    G = _compute_G(r_prune, m, n, wopt)

    # Generate the inverse index for the row orders to facilitate fast
    # sorting during the updating
    irndx_init = np.argsort(rndx_init)

    # Compute the conjugate of c
    cconj_init = conjugate(csort, m)

    # Get the running total of number of ones to assign
    count_init = np.sum(rsort)

    def do_sample():
        sample_prune = _compute_sample(logw,
                                       count_init, m_init, n_init,
                                       r_init, rndx_init, irndx_init,
                                       csort, cndx, cconj_init,
                                       G)
        return unprune(sample_prune)
    
    if T:
        return [do_sample() for t in xrange(T)]
    else:
        return do_sample()[0]
        return logkappa, logcvsq

if __name__ == '__main__':
    # Test of binary matrix generation code
    m = np.random.random(size=(12,10)) < 0.3
    r, c = np.sum(m, axis = 1), np.sum(m, axis = 0)
    print r, c
    A = arbitrary_from_margins(r, c)
    print np.sum(A, axis = 1), np.sum(A, axis = 0)

    # Test of "rc" balancing
    m = np.random.normal(10, 1, size = (6,5))
    r, c = np.ones(6), np.ones(5)
    c[0] = 2
    a, b = canonical_scalings(m, r, c)
    m_canonical = apply_scale(m, a, b)
    print m_canonical.sum(1)
    print m_canonical.sum(0)

    # Test of conjugate
    print conjugate([1,1,1,1,2,8], 10)

    # Test of approximate margins-conditional sampling
    N = 50
    a_out = np.random.normal(0, 1, N)
    a_in = np.random.normal(0, 1, N)
    x = np.random.normal(0, 1, (N,N))
    theta = 0.8
    logit_P = np.zeros((N,N))
    for i, a in enumerate(a_out):
        logit_P[i,:] += a
Ejemplo n.º 7
0
def approximate_from_margins_weights(r, c, w, T=None, sort_by_wopt_var=True):
    """Return approximate samples from row/column-conditional binary matrices.
                                     
Return a binary matrix (or a list of binary matrices) sampled
approximately according to the specified Bernoulli weights,
conditioned on having the specified margins.
Inputs:
  r: row margins, length m
  c: column margins, length n
  w: weight matrix, (m x n) matrix with values in (0, +infty)
  T: number of matrices to sample
  sort_by_wopt_var: when enabled, column ordering depends on w
Output:
  B_sample_sparse: (T default) sparse representation of (m x n) binary matrix
                   (T >= 1) list of (sparse binary matrices, logQ, logP)

More explicitly, consider independent Bernoulli random variables
B(i,j) arranged as an m x n matrix B given the m-vector of row sums
r and the n-vector of column sums c of the sample, i.e., given that
sum(B_sample, 1) = r and sum(B_sample, 0) = c.

An error is generated if no binary matrix agrees with r and c.

B(i,j) is Bernoulli(p(i,j)) where p(i,j) = w(i,j)/(1+w(i,j)), i.e.,
w(i,j) = p(i,j)/(1-p(i,j)).  [The case p(i,j) = 1 must be handled by
the user in a preprocessing step, by converting to p(i,j) = 0 and
decrementing the row and column sums appropriately.]

The sparse representation used for output is a matrix giving the
locations of the ones in the sample. If d = sum(r) = sum(c), then
B_sample_sparse has dimensions (d x 2). If something goes wrong (due
to undetected improper input), some of the rows of B_sample_sparse
may [-1,-1], indicating no entry of B_sample.

B_sample can be recovered from B_sample_sparse via:

    B_sample = np.zeros((m,n), dtype=np.bool)
    for i, j in B_sample_sparse:
        if i == -1: break 
        B_sample[i,j] = 1
"""
    r_prune, c_prune, arrays_prune, unprune = _prune(r, c, w)
    w_prune = arrays_prune[0]

    _check_margins(r_prune, c_prune)

    ### Preprocessing

    # Sizing (making copies of m and n, as they are mutated during sampling)
    r_init = r_prune.copy()
    m, n = len(r_prune), len(c_prune)
    if (m == 0) or (n == 0):
        if T:
            return [unprune([np.empty((0, 2)), 0, 0]) for t in xrange(T)]
        else:
            return np.empty((0, 0))
    m_init, n_init = m, n
    assert ((m, n) == w_prune.shape)

    # Sort the row margins (descending)
    rndx_init = np.argsort(-r_prune)
    rsort = r_prune[rndx_init]

    # Balance the weights
    a_scale, b_scale = canonical_scalings(w_prune, r_prune, c_prune)
    wopt = apply_scale(w_prune, a_scale, b_scale)

    # Reorder the columns
    if sort_by_wopt_var:
        cndx = np.lexsort((-wopt.var(0), c_prune))
    else:
        cndx = np.argsort(c_prune)
    csort = c_prune[cndx]
    wopt = wopt[:, cndx]

    # Precompute log weights
    logw = np.log(w_prune)

    # Compute G
    G = _compute_G(r_prune, m, n, wopt)

    # Generate the inverse index for the row orders to facilitate fast
    # sorting during the updating
    irndx_init = np.argsort(rndx_init)

    # Compute the conjugate of c
    cconj_init = conjugate(csort, m)

    # Get the running total of number of ones to assign
    count_init = np.sum(rsort)

    def do_sample():
        sample_prune = _compute_sample(logw, count_init, m_init, n_init,
                                       r_init, rndx_init, irndx_init, csort,
                                       cndx, cconj_init, G)
        return unprune(sample_prune)

    if T:
        return [do_sample() for t in xrange(T)]
    else:
        return do_sample()[0]
Ejemplo n.º 8
0

if __name__ == '__main__':
    # Test of binary matrix generation code
    m = np.random.random(size=(12, 10)) < 0.3
    r, c = np.sum(m, axis=1), np.sum(m, axis=0)
    print r, c
    A = arbitrary_from_margins(r, c)
    print np.sum(A, axis=1), np.sum(A, axis=0)

    # Test of "rc" balancing
    m = np.random.normal(10, 1, size=(6, 5))
    r, c = np.ones((6, 1)), np.ones((1, 5))
    c[0] = 2
    a, b = canonical_scalings(m, r, c)
    m_canonical = apply_scale(m, a, b)
    print m_canonical.sum(1)
    print m_canonical.sum(0)

    # Test of conjugate
    print conjugate([1, 1, 1, 1, 2, 8], 10)

    # Test of approximate margins-conditional sampling
    N = 50
    a_out = np.random.normal(0, 1, N)
    a_in = np.random.normal(0, 1, N)
    x = np.random.normal(0, 1, (N, N))
    theta = 0.8
    logit_P = np.zeros((N, N))
    for i, a in enumerate(a_out):
        logit_P[i, :] += a