Ejemplo n.º 1
0
def report_summary(L, neg_B):
    U, s, Vh = scipy.linalg.svd(neg_B, full_matrices=False, compute_uv=True)
    x = U.T[0]
    y = Vh[0]
    print 'L:'
    print L
    print
    print 'neg_B:'
    print neg_B
    print
    print 'U:'
    print U
    print
    print 's:'
    print s
    print
    print 'Vh:'
    print Vh
    print
    print 'U S vh:'
    print ndot(U, np.diag(s), Vh)
    print
    print 'approx:'
    print s[0] * np.outer(x, y)
    print
    xycat = x.tolist() + y.tolist()
    signs = set(np.sign(xycat).astype(np.int))
    if set([-1, 1]) <= signs:
        raise ValueError('multiple signs in the concatenated xy')
Ejemplo n.º 2
0
 def __init__(self, Q):
     """
     @param Q: rate matrix
     """
     # define intermediate variables
     v = mrate.R_to_distn(Q)
     n = len(v)
     psi = np.sqrt(v)
     c_low, c_mid, c_high = cheeger.get_cheeger_bounds(Q, v)
     # define member variables to summarize the rate matrix
     self.rate_matrix = Q
     self.exch_matrix = Q / v
     if not np.allclose(self.exch_matrix, self.exch_matrix.T):
         print self.exch_matrix
         raise ValueError('expected symmetry')
     self.sim_sym_matrix = np.outer(psi, 1 / psi) * Q
     if not np.allclose(self.sim_sym_matrix, self.sim_sym_matrix.T):
         print self.sim_sym_matrix
         raise ValueError('expected symmetry')
     self.distn = v
     self.distn_shannon_entropy = -ndot(np.log(v), v)
     self.distn_logical_entropy = ndot(v, 1 - v)
     self.expected_rate = -ndot(np.diag(Q), v)
     self.spectrum = scipy.linalg.eigvalsh(self.sim_sym_matrix)
     self.spectral_gap = -self.spectrum[-2]
     self.isoperimetric_low = c_low
     self.isoperimetric_constant = c_mid
     self.isoperimetric_high = c_high
     self.trace_bound_high = -sum(np.diag(Q)) / (n - 1)
Ejemplo n.º 3
0
 def __init__(self, Q):
     """
     @param Q: rate matrix
     """
     # define intermediate variables
     v = mrate.R_to_distn(Q)
     n = len(v)
     psi = np.sqrt(v)
     c_low, c_mid, c_high = cheeger.get_cheeger_bounds(Q, v)
     # define member variables to summarize the rate matrix
     self.rate_matrix = Q
     self.exch_matrix = Q / v
     if not np.allclose(self.exch_matrix, self.exch_matrix.T):
         print self.exch_matrix
         raise ValueError('expected symmetry')
     self.sim_sym_matrix = np.outer(psi, 1/psi) * Q
     if not np.allclose(self.sim_sym_matrix, self.sim_sym_matrix.T):
         print self.sim_sym_matrix
         raise ValueError('expected symmetry')
     self.distn = v
     self.distn_shannon_entropy = -ndot(np.log(v), v)
     self.distn_logical_entropy = ndot(v, 1-v)
     self.expected_rate = -ndot(np.diag(Q), v)
     self.spectrum = scipy.linalg.eigvalsh(self.sim_sym_matrix)
     self.spectral_gap = -self.spectrum[-2]
     self.isoperimetric_low = c_low
     self.isoperimetric_constant = c_mid
     self.isoperimetric_high = c_high
     self.trace_bound_high = -sum(np.diag(Q)) / (n-1)
Ejemplo n.º 4
0
def get_p_id_deriv_ratio(R, t):
    """
    Get (second derivative of p_identity) divided by (first derivative of p_id)
    """
    n = len(R)
    # symmetrize the rate matrix
    v = mrate.R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    S = ndot(lam, -R, rlam)
    # eigendecompose the symmetrized rate matrix
    # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam)
    W, V = scipy.linalg.eigh(S)
    # get P and its two derivatives
    P = ndot(rlam, V, np.diag(np.exp(-W*t)), V.T, lam)
    P_dt = ndot(rlam, V, np.diag(-W*np.exp(-W*t)), V.T, lam)
    P_dtt = ndot(rlam, V, np.diag(W*W*np.exp(-W*t)), V.T, lam)
    # get the two derivatives of expected identity
    e_dt = 0.0
    e_dtt = 0.0
    for i in range(n):
        for j in range(n):
            e_dt += v[i] * P_dt[i, i]
            e_dtt += v[i] * P_dtt[i, i]
    return e_dtt / e_dt
Ejemplo n.º 5
0
def report_summary(L, neg_B):
    U, s, Vh = scipy.linalg.svd(neg_B, full_matrices=False, compute_uv=True)
    x = U.T[0]
    y = Vh[0]
    print 'L:'
    print L
    print
    print 'neg_B:'
    print neg_B
    print
    print 'U:'
    print U
    print
    print 's:'
    print s
    print
    print 'Vh:'
    print Vh
    print 
    print 'U S vh:'
    print ndot(U, np.diag(s), Vh)
    print 
    print 'approx:'
    print s[0] * np.outer(x, y)
    print 
    xycat = x.tolist() + y.tolist()
    signs = set(np.sign(xycat).astype(np.int))
    if set([-1, 1]) <= signs:
        raise ValueError('multiple signs in the concatenated xy')
Ejemplo n.º 6
0
def get_p_id_deriv_ratio(R, t):
    """
    Get (second derivative of p_identity) divided by (first derivative of p_id)
    """
    n = len(R)
    # symmetrize the rate matrix
    v = mrate.R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    S = ndot(lam, -R, rlam)
    # eigendecompose the symmetrized rate matrix
    # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam)
    W, V = scipy.linalg.eigh(S)
    # get P and its two derivatives
    P = ndot(rlam, V, np.diag(np.exp(-W * t)), V.T, lam)
    P_dt = ndot(rlam, V, np.diag(-W * np.exp(-W * t)), V.T, lam)
    P_dtt = ndot(rlam, V, np.diag(W * W * np.exp(-W * t)), V.T, lam)
    # get the two derivatives of expected identity
    e_dt = 0.0
    e_dtt = 0.0
    for i in range(n):
        for j in range(n):
            e_dt += v[i] * P_dt[i, i]
            e_dtt += v[i] * P_dtt[i, i]
    return e_dtt / e_dt
Ejemplo n.º 7
0
def get_rhs_old(M, nkeep, ndelete):
    """
    This is the rhs of a putative identity.
    """
    n = nkeep + ndelete
    R = get_deletion_projection(nkeep, ndelete)
    H = get_centering_projection(n)
    P = ndot(H, R, H)
    inside = ndot(P.T, M, P)
    return ndot(P, np.linalg.pinv(inside), P.T)
Ejemplo n.º 8
0
def get_rhs_also_old(M, nkeep, ndelete):
    """
    This is the rhs of a putative identity.
    """
    n = nkeep + ndelete
    R = get_deletion_projection(nkeep, ndelete)
    H = get_centering_projection(n)
    # build up the matrix
    X = M
    X = ndot(R, H, X, H, R)
    X = np.linalg.pinv(X)
    X = ndot(R, H, X, H, R)
    return X
Ejemplo n.º 9
0
def get_rhs(M, nkeep, ndelete):
    n = nkeep + ndelete
    P = get_p_centering_partial_del(nkeep, ndelete)
    R = get_p_centering_partial(nkeep, ndelete)
    H = get_p_centering(n)
    D = get_p_del(nkeep, ndelete)
    # define the target value in a few different ways
    target_a = schur_del(np.linalg.pinv(ndot(H, M, H)), nkeep, ndelete)
    target_b = np.linalg.pinv(ndot(P, M, P))
    target_c = np.linalg.pinv(ndot(D, R, M, R.T, D))
    # Try to find another way to get the target value using projections,
    # hopefully a way that uses projections in a way that can be shown
    # to be equivalent to taking a schur complement in a pseudoinverse.
    return np.linalg.pinv(ndot(D, R, M, R.T, D))
Ejemplo n.º 10
0
def get_identicality_params(R):
    """
    This returns the parameters for an identicality function.
    If the rate matrix has n states
    then the identicality function is
    f(t) = a1*exp(b1*t) + a2*exp(b2*t) + ... + a{n-1}*exp(b{n-1}*t) + c
    @param R: time reversible rate matrix
    @return: a array, b array, c
    """
    n = len(R)
    pi_arr = R_to_distn(R)
    # symmetrize
    lam = np.diag(np.sqrt(pi_arr))
    rlam = np.diag(np.reciprocal(np.sqrt(pi_arr)))
    S = ndot(lam, R, rlam)
    print 'S should be symmetric:'
    print S
    print S - S.T
    # eigendecompose the symmetric matrix
    W, V = scipy.linalg.eigh(S)
    w_v_pairs = [(W[i], V[:,i]) for i in range(n)]
    # get the exponential coefficients
    eps = 1e-12
    identicality_coeffs = [
            np.dot(pi_arr, v*v) for w, v in w_v_pairs if abs(w) > eps]
    # get the exponential rate constants
    identicality_rates = [
            w for w in W if abs(w) > eps]
    # get the one dimensional constant
    identicality_const = np.inner(pi_arr, pi_arr)
    # return the identicality parameters
    return (identicality_coeffs, identicality_rates, identicality_const)
Ejemplo n.º 11
0
def get_identicality_params(R):
    """
    This returns the parameters for an identicality function.
    If the rate matrix has n states
    then the identicality function is
    f(t) = a1*exp(b1*t) + a2*exp(b2*t) + ... + a{n-1}*exp(b{n-1}*t) + c
    @param R: time reversible rate matrix
    @return: a array, b array, c
    """
    n = len(R)
    pi_arr = R_to_distn(R)
    # symmetrize
    lam = np.diag(np.sqrt(pi_arr))
    rlam = np.diag(np.reciprocal(np.sqrt(pi_arr)))
    S = ndot(lam, R, rlam)
    print 'S should be symmetric:'
    print S
    print S - S.T
    # eigendecompose the symmetric matrix
    W, V = scipy.linalg.eigh(S)
    w_v_pairs = [(W[i], V[:, i]) for i in range(n)]
    # get the exponential coefficients
    eps = 1e-12
    identicality_coeffs = [
        np.dot(pi_arr, v * v) for w, v in w_v_pairs if abs(w) > eps
    ]
    # get the exponential rate constants
    identicality_rates = [w for w in W if abs(w) > eps]
    # get the one dimensional constant
    identicality_const = np.inner(pi_arr, pi_arr)
    # return the identicality parameters
    return (identicality_coeffs, identicality_rates, identicality_const)
Ejemplo n.º 12
0
def get_response_content(fs):

    # set up print options
    np.set_printoptions(
            linewidth=1000000,
            threshold=1000000,
            )
    out = StringIO()

    # define the Laplacian matrix
    L = np.array([
        [ 1,  0,  0,  0,  0, -1,  0,  0],
        [ 0,  2,  0,  0,  0, -2,  0,  0],
        [ 0,  0,  3,  0,  0,  0, -3,  0],
        [ 0,  0,  0,  2,  0,  0, -2,  0],
        [ 0,  0,  0,  0,  1,  0,  0, -1],
        [-1, -2,  0,  0,  0,  4,  0, -1],
        [ 0,  0, -3, -2,  0,  0,  6, -1],
        [ 0,  0,  0,  0, -1, -1, -1,  3],
        ], dtype=float)

    # remove the last two columns by schur complementation
    L_schur = L[:-2, :-2] - ndot(
            L[:-2, -2:], 
            scipy.linalg.inv(L[-2:, -2:]),
            L[-2:, :-2])

    # get the trailing block of the matrix
    L_schur_component = L_schur[-4:, -4:]

    # get the part corresponding to the inverse of a rooted covariance matrix
    L_schur_rooted = L_schur_component[:-1, :-1]
    
    # get the corresponding covariance matrix
    cov = scipy.linalg.inv(L_schur_rooted)

    # print the matrices
    print >> out, 'L:'
    print >> out, L
    print >> out
    print >> out, 'schur complement of two internal vertices (7, 8) in L:'
    print >> out, L_schur
    print >> out
    print >> out, 'a component (3, 4, 5, 6) of the schur complement:'
    print >> out, L_schur_component
    print >> out
    print >> out, 'a piece (3, 4, 5) of the component:'
    print >> out, L_schur_rooted
    print >> out
    print >> out, 'the corresponding rooted covariance matrix:'
    print >> out, cov
    print >> out
    print >> out, 'trace of covariance matrix:'
    print >> out, np.trace(cov)
    print >> out

    # show the result
    return out.getvalue()
Ejemplo n.º 13
0
def schur(M, nsmall):
    A = M[:nsmall, :nsmall]
    B = M[:nsmall, nsmall:]
    C = M[nsmall:, nsmall:]
    d = np.linalg.det(C)
    if abs(d) < 1e-5:
        raise ValueError('small determinant for schur complement')
    C_inv = np.linalg.inv(C)
    return A - ndot(B, C_inv, B.T)
Ejemplo n.º 14
0
def schur(M, nsmall):
    A = M[:nsmall, :nsmall]
    B = M[:nsmall, nsmall:]
    C = M[nsmall:, nsmall:]
    d = np.linalg.det(C)
    if abs(d) < 1e-5:
        raise ValueError('small determinant for schur complement')
    C_inv = np.linalg.inv(C)
    return A - ndot(B, C_inv, B.T)
Ejemplo n.º 15
0
def symmetrized(R):
    """
    Get the symmetrized matrix.
    This returns a symmetric matrix that is not a rate matrix
    because rows do not sum to zero.
    """
    v = R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    return ndot(lam, R, rlam)
Ejemplo n.º 16
0
def symmetrized(R):
    """
    Get the symmetrized matrix.
    This returns a symmetric matrix that is not a rate matrix
    because rows do not sum to zero.
    """
    v = R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    return ndot(lam, R, rlam)
Ejemplo n.º 17
0
def _get_expectation(R, t):
    n = len(R)
    # symmetrize the rate matrix
    v = mrate.R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    S = ndot(lam, -R, rlam)
    # eigendecompose the symmetrized rate matrix
    # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam)
    W, V = scipy.linalg.eigh(S)
    # get P and its two derivatives
    P = ndot(rlam, V, np.diag(np.exp(-W * t)), V.T, lam)
    P_dt = ndot(rlam, V, np.diag(-W * np.exp(-W * t)), V.T, lam)
    P_dtt = ndot(rlam, V, np.diag(W * W * np.exp(-W * t)), V.T, lam)
    M = (P * P_dtt - P_dt * P_dt) / P
    expectation = 0.0
    for i in range(n):
        for j in range(n):
            expectation += v[i] * M[i, j]
    return expectation
Ejemplo n.º 18
0
def symmetrized_known_distn(R, v):
    """
    Get the symmetrized matrix of a reversible markov process.
    This returns a symmetric matrix that is not a rate matrix
    because rows do not sum to zero.
    The returned matrix should be similar to R
    in the sense of linear algebra matrix similarity.
    """
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    return ndot(lam, R, rlam)
Ejemplo n.º 19
0
def symmetrized_known_distn(R, v):
    """
    Get the symmetrized matrix of a reversible markov process.
    This returns a symmetric matrix that is not a rate matrix
    because rows do not sum to zero.
    The returned matrix should be similar to R
    in the sense of linear algebra matrix similarity.
    """
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    return ndot(lam, R, rlam)
Ejemplo n.º 20
0
def _get_expectation(R, t):
    n = len(R)
    # symmetrize the rate matrix
    v = mrate.R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    S = ndot(lam, -R, rlam)
    # eigendecompose the symmetrized rate matrix
    # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam)
    W, V = scipy.linalg.eigh(S)
    # get P and its two derivatives
    P = ndot(rlam, V, np.diag(np.exp(-W*t)), V.T, lam)
    P_dt = ndot(rlam, V, np.diag(-W*np.exp(-W*t)), V.T, lam)
    P_dtt = ndot(rlam, V, np.diag(W*W*np.exp(-W*t)), V.T, lam)
    M = (P*P_dtt - P_dt*P_dt) / P
    expectation = 0.0
    for i in range(n):
        for j in range(n):
            expectation += v[i] * M[i, j]
    return expectation
Ejemplo n.º 21
0
def bott_duffin(M):
    """
    We pretend that P_L is H.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    return ndot(H, np.linalg.inv(np.dot(M, H) + P))
Ejemplo n.º 22
0
def bott_duffin(M):
    """
    We pretend that P_L is H.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    return ndot(H, np.linalg.inv(np.dot(M, H) + P))
Ejemplo n.º 23
0
def get_response_content(fs):
    # define some dimensions
    nleading = 5
    ntrailing = 3
    nullity = 2
    n = nleading + ntrailing
    # get a random matrix assumed to be nonsingular
    M_nonsingular = sample_asymmetric_matrix(n)
    # get a random nullspace and its associated projections
    N = 10.0 * np.random.rand(n, nullity) - 5.0
    P_to_N = ndot(N, np.linalg.inv(ndot(N.T, N)), N.T)
    P_to_N_complement = np.eye(n) - P_to_N
    # get the truncated nullspace and its associated projections
    T = N[:nleading]
    P_to_T = ndot(T, np.linalg.inv(ndot(T.T, T)), T.T)
    P_to_T_complement = np.eye(nleading) - P_to_T
    # get the singular M with assumed nonsingular principal submatrix blocks
    M_singular = ndot(P_to_N_complement, M_nonsingular, P_to_N_complement)
    # get the schur complement in M and its eigendecomposition
    S = schur(M_singular, nleading)
    S_w, S_vt = np.linalg.eigh(S)
    # Get the double sided projection of the schur complement
    # onto the orthogonal complement of the truncated nullspace.
    Sp = ndot(P_to_T_complement, S, P_to_T_complement)
    Sp_w, Sp_vt = np.linalg.eigh(Sp)
    # Make a thing that is supposed to be the same as the schur complement.
    M_singular_pinv = np.linalg.pinv(M_singular)
    mystery_pinv = ndot(
            P_to_T_complement,
            M_singular_pinv[:nleading, :nleading],
            P_to_T_complement)
    mystery = np.linalg.pinv(mystery_pinv)
    # begin the output
    np.set_printoptions(linewidth=200)
    out = StringIO()
    print >> out, 'null space (N):'
    print >> out, N
    print >> out, 'schur complement (S):'
    print >> out, S
    print >> out, 'eigenvalues of S:'
    print >> out, S_w
    print >> out, 'eigenvectors of S:'
    print >> out, S_vt
    print >> out, 'double sided projection of the schur complement'
    print >> out, 'onto the complement of the truncated nullspace of M (Sp)'
    print >> out, Sp
    print >> out, 'eigenvalues of Sp:'
    print >> out, Sp_w
    print >> out, 'eigenvectors of Sp:'
    print >> out, Sp_vt
    print >> out, 'this thing that is supposed to be the schur complement:'
    print >> out, mystery
    print >> out, 'difference from the schur complement:'
    print >> out, mystery - S
    return out.getvalue()
Ejemplo n.º 24
0
def pinvproj(M):
    """
    This could be made more efficient using double centering.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    HMH = ndot(H, M, H)
    return np.linalg.inv(HMH + P) - P
Ejemplo n.º 25
0
def pinvproj(M):
    """
    This could be made more efficient using double centering.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    HMH = ndot(H, M, H)
    return np.linalg.inv(HMH + P) - P
Ejemplo n.º 26
0
def get_response_content(fs):
    n = 5
    e = np.ones(n)
    J = np.outer(e, e)
    M = sample_asymmetric_matrix(n)
    M_sum = np.sum(M)
    MJM = ndot(M, J, M)
    R = mean_removed(M)
    RJR = ndot(R, J, R)
    # begin the output
    np.set_printoptions(linewidth=200)
    out = StringIO()
    print >> out, 'original matrix (M):'
    print >> out, M
    print >> out, 'MJM:'
    print >> out, MJM
    print >> out, 'mean-removed matrix (R):'
    print >> out, R
    print >> out, 'grand mean of R:'
    print >> out, np.mean(R)
    print >> out, 'RJR:'
    print >> out, RJR
    return out.getvalue()
Ejemplo n.º 27
0
def get_barbell_rate_matrix(p_mid):
    # define a hollow exchangeability-like matrix
    nstates = 3
    Z = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0]])
    # define the stationary distribution
    p = np.array([(1 - p_mid) / 2, p_mid, (1 - p_mid) / 2])
    # define the mutation matrix
    D = np.diag(p)
    D_inv = np.diag(np.reciprocal(p))
    Q_unnormal = ndot(D_inv**0.5, Z, D**0.5)
    Q = np.copy(Q_unnormal)
    for i in range(nstates):
        Q[i, i] = -np.sum(Q[i])
    return Q, p
Ejemplo n.º 28
0
def get_internal_vertex_to_leaf_distn_cov(T, B):
    """
    This is a possibly equivalent formualtion.
    It is based on Schur complementation in the unrooted covariance matrix.
    Return a map from an internal vertex to a leaf distribution.
    @return: a dictionary that maps an internal vertex to a leaf distribution
    """
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    vertices = leaves + internal
    # Get the full tree Laplacian matrix.
    L = Ftree.TB_to_L_principal(T, B, vertices)
    # Get the unrooted covariance matrix.
    HSH = np.linalg.pinv(L)
    # Use the multivariate normal distribution wikipedia page
    # for conditional distributions.
    nleaves = len(leaves)
    ninternal = len(internal)
    #
    # This interpolator works.
    #Lbb = L[nleaves:, nleaves:]
    #Lba = L[nleaves:, :nleaves]
    #interpolator = -ndot(np.linalg.pinv(Lbb), Lba)
    #
    # This interpolator seems like it should work but it does not.
    Saa = HSH[:nleaves, :nleaves]
    Sba = HSH[nleaves:, :nleaves]
    #print 'det(Saa)'
    #print np.linalg.det(Saa)
    interpolator = ndot(Sba, np.linalg.pinv(Saa))
    #
    # Try a hack.
    #eps = 1e-12
    #nvertices = len(vertices)
    #J = np.ones((nvertices, nvertices))
    #Saa = (HSH + J)[:nleaves, :nleaves]
    #Sba = (HSH + J)[nleaves:, :nleaves]
    #interpolator = ndot(Sba, np.linalg.pinv(Saa))
    #
    #print 'cov interpolator:'
    #print interpolator.shape
    #print interpolator
    d = {}
    for i, v in enumerate(internal):
        distn = {}
        for j, leaf in enumerate(leaves):
            distn[leaf] = interpolator[i, j]
        d[v] = distn
    return d
Ejemplo n.º 29
0
def check_generic_cut(valuator, extendor, A):
    """
    The input matrix is expected to have a certain block structure.
    In particular, the leaf vertices are expected to
    precede the points of articulation.
    Because the tree is expected to be an unrooted binary tree,
    the relative number of leaves and points of articulation
    is determined by the size of the adjacency matrix.
    @param A: adjacency matrix of an unrooted edge-weighted binary tree
    @return: True if a counterexample is found
    """
    MatrixUtil.assert_symmetric(A)
    MatrixUtil.assert_nonnegative(A)
    MatrixUtil.assert_hollow(A)
    nverts = A.shape[0]
    if nverts < 4:
        raise Exception('expected at least four vertices')
    if nverts % 2 != 0:
        raise Exception('expected an even number of vertices')
    ntips = nverts / 2 + 1
    narts = nverts / 2 - 1
    # get the schur complement laplacian and its associated adjacency matrix
    L = np.diag(np.sum(A, axis=1)) - A
    L_tips = L[:ntips, :ntips] - ndot(
            L[:ntips, -narts:],
            scipy.linalg.inv(L[-narts:, -narts:]),
            L[-narts:, :ntips],
            )
    A_tips = np.diag(np.diag(L_tips)) - L_tips
    tip_valuations = valuator(A_tips)
    #tip_valuations -= np.mean(tip_valuations)
    #tip_valuations /= np.linalg.norm(tip_valuations)
    art_valuations = extendor(A, tip_valuations)
    #ntip_pos = sum(1 for v in tip_valuations if v > 0)
    #ntip_neg = sum(1 for v in tip_valuations if v < 0)
    #nart_pos = sum(1 for v in art_valuations if v > 0)
    #nart_neg = sum(1 for v in art_valuations if v < 0)
    #print ((ntip_pos, ntip_neg), (nart_pos, nart_neg))
    valuations = np.concatenate((tip_valuations, art_valuations))
    ncrossings = 0
    for i in range(nverts):
        for j in range(i+1, nverts):
            if valuations[i] * valuations[j] * A[i, j] < 0:
                ncrossings += 1
    if ncrossings != 1:
        # found a counterexample!
        print ncrossings
        print A
        return True
Ejemplo n.º 30
0
def check_generic_cut(valuator, extendor, A):
    """
    The input matrix is expected to have a certain block structure.
    In particular, the leaf vertices are expected to
    precede the points of articulation.
    Because the tree is expected to be an unrooted binary tree,
    the relative number of leaves and points of articulation
    is determined by the size of the adjacency matrix.
    @param A: adjacency matrix of an unrooted edge-weighted binary tree
    @return: True if a counterexample is found
    """
    MatrixUtil.assert_symmetric(A)
    MatrixUtil.assert_nonnegative(A)
    MatrixUtil.assert_hollow(A)
    nverts = A.shape[0]
    if nverts < 4:
        raise Exception('expected at least four vertices')
    if nverts % 2 != 0:
        raise Exception('expected an even number of vertices')
    ntips = nverts / 2 + 1
    narts = nverts / 2 - 1
    # get the schur complement laplacian and its associated adjacency matrix
    L = np.diag(np.sum(A, axis=1)) - A
    L_tips = L[:ntips, :ntips] - ndot(
        L[:ntips, -narts:],
        scipy.linalg.inv(L[-narts:, -narts:]),
        L[-narts:, :ntips],
    )
    A_tips = np.diag(np.diag(L_tips)) - L_tips
    tip_valuations = valuator(A_tips)
    #tip_valuations -= np.mean(tip_valuations)
    #tip_valuations /= np.linalg.norm(tip_valuations)
    art_valuations = extendor(A, tip_valuations)
    #ntip_pos = sum(1 for v in tip_valuations if v > 0)
    #ntip_neg = sum(1 for v in tip_valuations if v < 0)
    #nart_pos = sum(1 for v in art_valuations if v > 0)
    #nart_neg = sum(1 for v in art_valuations if v < 0)
    #print ((ntip_pos, ntip_neg), (nart_pos, nart_neg))
    valuations = np.concatenate((tip_valuations, art_valuations))
    ncrossings = 0
    for i in range(nverts):
        for j in range(i + 1, nverts):
            if valuations[i] * valuations[j] * A[i, j] < 0:
                ncrossings += 1
    if ncrossings != 1:
        # found a counterexample!
        print ncrossings
        print A
        return True
Ejemplo n.º 31
0
def get_pi_mi_t2_diag_approx(Q, v, t):
    """
    Second order taylor expansion for only some contributions.
    Contributions of off-diagonal entries are computed exactly.
    @param Q: parent independent rate matrix
    @param v: stationary distribution
    @param t: amount of time
    """
    n = len(v)
    # get the randomization rate
    a = -np.trace(Q) / (n-1)
    x = math.exp(-a*t)
    h = ndot(v, 1-v)
    adjustment = (x*(1 - 0.5*x - math.log(1-x)) + math.log(1-x))*h
    return get_pi_mi_t2_approx(Q, v, t) + adjustment
Ejemplo n.º 32
0
def get_barbell_rate_matrix(p_mid):
    # define a hollow exchangeability-like matrix
    nstates = 3
    Z = np.array([
        [0.0, 1.0, 0.0],
        [1.0, 0.0, 1.0],
        [0.0, 1.0, 0.0]])
    # define the stationary distribution
    p = np.array([(1 - p_mid)/2, p_mid, (1 - p_mid)/2])
    # define the mutation matrix
    D = np.diag(p)
    D_inv = np.diag(np.reciprocal(p))
    Q_unnormal = ndot(D_inv**0.5, Z, D**0.5)
    Q = np.copy(Q_unnormal)
    for i in range(nstates):
        Q[i, i] = -np.sum(Q[i])
    return Q, p
Ejemplo n.º 33
0
def _holmes_rubin_2002_summation(U, a, b, i, K):
    """
    @param U: an orthonormal matrix
    @param a: integer initial state index
    @param b: integer final state index
    @param i: integer query state index
    @param K: a symmetric matrix with eigenvalue and time information
    """
    """
    total = 0
    for k in range(len(U)):
        for l in range(len(U)):
            total += U[a,k]*U[i,k]*U[i,l]*U[b,l]*K[k,l]
    return total
    """
    u = U[a] * U[i]
    v = U[b] * U[i]
    return ndot(u, K, v)
Ejemplo n.º 34
0
def _holmes_rubin_2002_summation(U, a, b, i, K):
    """
    @param U: an orthonormal matrix
    @param a: integer initial state index
    @param b: integer final state index
    @param i: integer query state index
    @param K: a symmetric matrix with eigenvalue and time information
    """
    """
    total = 0
    for k in range(len(U)):
        for l in range(len(U)):
            total += U[a,k]*U[i,k]*U[i,l]*U[b,l]*K[k,l]
    return total
    """
    u = U[a] * U[i]
    v = U[b] * U[i]
    return ndot(u, K, v)
Ejemplo n.º 35
0
def pinvproj(M):
    """
    This could be made more efficient using double centering.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    #
    #HMH = MatrixUtil.double_centered(M)
    #return np.linalg.pinv(HMH)
    #
    HMH = ndot(H, M, H)
    W = HMH + P
    d = np.linalg.det(W)
    if abs(d) < 1e-5:
        #raise ValueError('small determinant in pinvproj: %f' % d)
        pass
    return np.linalg.inv(W) - P
Ejemplo n.º 36
0
def pinvproj(M):
    """
    This could be made more efficient using double centering.
    """
    nrows, ncols = M.shape
    if nrows != ncols:
        raise ValueError('expected a square matrix')
    e = np.ones(nrows)
    I = np.eye(nrows)
    P = np.outer(e, e) / np.inner(e, e)
    H = I - P
    #
    #HMH = MatrixUtil.double_centered(M)
    #return np.linalg.pinv(HMH)
    #
    HMH = ndot(H, M, H)
    W = HMH + P
    d = np.linalg.det(W)
    if abs(d) < 1e-5:
        #raise ValueError('small determinant in pinvproj: %f' % d)
        pass
    return np.linalg.inv(W) - P
Ejemplo n.º 37
0
def get_lhs(M, nkeep, ndelete):
    n = nkeep + ndelete
    P = get_p_centering_partial_del(nkeep, ndelete)
    return np.linalg.pinv(ndot(P, M, P))
Ejemplo n.º 38
0
def get_response_content(fs):
    if fs.plain:
        Q, v_trans, v_recur = get_plain_rate_matrix()
    elif fs.rand:
        Q, v_trans, v_recur = get_random_structured_rate_matrix()
    else:
        raise Exception
    nstates = Q.shape[0]
    w, vl, vr = scipy.linalg.eig(Q, left=True, right=True)
    vl_inv = scipy.linalg.inv(vl)
    vr_inv = scipy.linalg.inv(vr)
    #
    # do weird things with the sylvester equation
    n = nstates / 2
    A_syl = Q[:n, :n]
    B_syl = -Q[n:, n:]
    Q_syl = -Q[:n, n:]
    X = scipy.linalg.solve_sylvester(A_syl, B_syl, Q_syl)
    T = np.array(np.bmat([
        [np.eye(n), X],
        [np.zeros((n, n)), np.eye(n)],
    ]))
    T_inv = scipy.linalg.inv(T)
    #
    # do stuff with the stationary distributions of the separate processes
    v_trans_recur = np.hstack((v_trans, v_recur))
    D_sqrt = np.diag(np.sqrt(v_trans_recur))
    D_sqrt_recip = np.diag(np.reciprocal(np.sqrt(v_trans_recur)))
    block_diag = ndot(D_sqrt, T_inv, Q, T, D_sqrt_recip)
    w, U = scipy.linalg.eigh(block_diag)
    w_full = np.diag(ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U))
    #
    np.set_printoptions(
        linewidth=1000000,
        threshold=1000000,
    )
    out = StringIO()
    print >> out, 'Q:'
    print >> out, Q
    print >> out
    print >> out, 'w:'
    print >> out, w
    print >> out
    print >> out, 'vl:'
    print >> out, vl
    print >> out
    print >> out, 'vl.T:'
    print >> out, vl.T
    print >> out
    print >> out, 'inv(vl):'
    print >> out, vl_inv
    print >> out
    print >> out, 'vr:'
    print >> out, vr
    print >> out
    print >> out, 'vr.T:'
    print >> out, vr.T
    print >> out
    print >> out, 'inv(vr):'
    print >> out, vr_inv
    print >> out
    print >> out, 'inv(vl).T w vl.T:'
    print >> out, np.dot(vl_inv.T, np.dot(np.diag(w), vl.T))
    print >> out
    print >> out, 'vr w inv(vr):'
    print >> out, np.dot(vr, np.dot(np.diag(w), vr_inv))
    print >> out
    print >> out
    print >> out, 'sylvester equation stuff...'
    print >> out
    print >> out, 'X:'
    print >> out, X
    print >> out
    print >> out, 'T:'
    print >> out, T
    print >> out
    print >> out, 'inv(T):'
    print >> out, T_inv
    print >> out
    print >> out, 'inv(T) Q T:'
    print >> out, np.dot(T_inv, np.dot(Q, T))
    print >> out
    print >> out, 'U.T D^(1/2) inv(T) Q T D^(-1/2) U:'
    print >> out, ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U)
    print >> out
    print >> out, 'expm(Q):'
    print >> out, scipy.linalg.expm(Q)
    print >> out
    print >> out, 'T D^-1/2 U exp(w) U.T D^1/2 T^-1'
    print >> out, ndot(T, D_sqrt_recip, U, np.diag(np.exp(w_full)), U.T,
                       D_sqrt, T_inv)
    print >> out
    return out.getvalue()
Ejemplo n.º 39
0
def process(fs):
    n = fs.nstates
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # Sample a symmetric rate matrix and a stationary distribution,
    # then construct the rate matrix R.
    S = MatrixUtil.sample_pos_sym_matrix(n)
    v = mrate.sample_distn(n)
    psi = np.sqrt(v)
    R = (S.T / psi).T * psi
    R -= np.diag(np.sum(R, axis=1))
    R_W, R_V = scipy.linalg.eig(R)
    # construct the symmetric matrix that is similar to R
    R_sim = (R.T * psi).T / psi
    if not np.allclose(R_sim, R_sim.T):
        raise ValueError('the similar symmetric matrix is not symmetric...')
    R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim)
    R_gap = -R_sim_W[-2]
    v2 = R_sim_V.T[-2]**2
    # reconstruct the eigenvectors of R
    R_V_rebuilt = (R_sim_V.T / psi).T
    # Try to make the commute time matrix.
    # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it.
    R_sim_pinv = scipy.linalg.pinv(R_sim)
    myouter = np.outer(np.ones(n), np.diag(R_sim_pinv))
    D = -(myouter + myouter.T - 2 * R_sim_pinv)
    D_commute = mrate.get_commute_distance_matrix(R, v)
    if not np.allclose(D, D_commute):
        raise ValueError('error computing commute distances')
    HDH = MatrixUtil.double_centered(D)
    HDH_W, HDH_V = scipy.linalg.eigh(HDH)
    # compute squared pairwise distances brutely
    X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1])
    D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X])
    print >> out, 'reconstructed EDM:'
    print >> out, D
    print >> out
    D = (D.T / psi).T / psi
    print >> out, 'divide by square roots of stationary probabilities:'
    print >> out, D
    print >> out
    print >> out, 'eigh of centered EDM:'
    print >> out, 'eigenvalues:'
    print >> out, HDH_W
    print >> out, 'reciprocal nonzero eigenvalues:'
    print >> out, 1 / HDH_W
    print >> out, 'eigenvectors:'
    print >> out, HDH_V
    print >> out
    print >> out, 'squared distances computed brutely:'
    print >> out, D_brute
    print >> out
    print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1 - v) * np.max(D))
    print >> out, '1 / max(D):', 1 / np.max(D)
    print >> out
    # report some more standard stuff
    print >> out, 'sampled rate matrix R:'
    print >> out, R
    print >> out, 'stationary distn:', v
    print >> out, '1/R01 + 1/R10:', 1 / R[0, 1] + 1 / R[1, 0]
    print >> out
    print >> out, 'scipy.linagl.eig(R):'
    print >> out, R_W
    print >> out, R_V
    print >> out
    print >> out, 'symmetric matrix similar to R:'
    print >> out, R_sim
    print >> out
    print >> out, 'eigh of the symmetric similar matrix to R:'
    print >> out, R_sim_W
    print >> out, R_sim_V
    print >> out, 'spectral gap:', R_gap
    print >> out, 'entrywise squares of eigenvectors:'
    print >> out, R_sim_V**2
    print >> out, 'a bilinear form involving a fiedler-like eigenvector:'
    print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2])
    print >> out, 'expected rate:', -np.dot(v, np.diag(R))
    print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R))
    print >> out
    print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:'
    print >> out, R_sim_W
    print >> out, R_V_rebuilt
    print >> out
    return out.getvalue().rstrip()
Ejemplo n.º 40
0
def schur(M, nsmall):
    B = M[:nsmall, nsmall:]
    C = np.linalg.inv(M[nsmall:, nsmall:])
    return M[:nsmall, :nsmall] - ndot(B, C, B.T)
Ejemplo n.º 41
0
def schur(M, nsmall):
    B = M[:nsmall, nsmall:]
    C = np.linalg.inv(M[nsmall:, nsmall:])
    return M[:nsmall, :nsmall] - ndot(B, C, B.T)
Ejemplo n.º 42
0
def get_response_content(fs):
    if fs.plain:
        Q, v_trans, v_recur = get_plain_rate_matrix()
    elif fs.rand:
        Q, v_trans, v_recur = get_random_structured_rate_matrix()
    else:
        raise Exception
    nstates = Q.shape[0]
    w, vl, vr = scipy.linalg.eig(Q, left=True, right=True)
    vl_inv = scipy.linalg.inv(vl)
    vr_inv = scipy.linalg.inv(vr)
    #
    # do weird things with the sylvester equation
    n = nstates / 2
    A_syl = Q[:n, :n]
    B_syl = -Q[n:, n:]
    Q_syl = -Q[:n, n:]
    X = scipy.linalg.solve_sylvester(A_syl, B_syl, Q_syl)
    T = np.array(np.bmat([[np.eye(n), X], [np.zeros((n, n)), np.eye(n)]]))
    T_inv = scipy.linalg.inv(T)
    #
    # do stuff with the stationary distributions of the separate processes
    v_trans_recur = np.hstack((v_trans, v_recur))
    D_sqrt = np.diag(np.sqrt(v_trans_recur))
    D_sqrt_recip = np.diag(np.reciprocal(np.sqrt(v_trans_recur)))
    block_diag = ndot(D_sqrt, T_inv, Q, T, D_sqrt_recip)
    w, U = scipy.linalg.eigh(block_diag)
    w_full = np.diag(ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U))
    #
    np.set_printoptions(linewidth=1000000, threshold=1000000)
    out = StringIO()
    print >> out, "Q:"
    print >> out, Q
    print >> out
    print >> out, "w:"
    print >> out, w
    print >> out
    print >> out, "vl:"
    print >> out, vl
    print >> out
    print >> out, "vl.T:"
    print >> out, vl.T
    print >> out
    print >> out, "inv(vl):"
    print >> out, vl_inv
    print >> out
    print >> out, "vr:"
    print >> out, vr
    print >> out
    print >> out, "vr.T:"
    print >> out, vr.T
    print >> out
    print >> out, "inv(vr):"
    print >> out, vr_inv
    print >> out
    print >> out, "inv(vl).T w vl.T:"
    print >> out, np.dot(vl_inv.T, np.dot(np.diag(w), vl.T))
    print >> out
    print >> out, "vr w inv(vr):"
    print >> out, np.dot(vr, np.dot(np.diag(w), vr_inv))
    print >> out
    print >> out
    print >> out, "sylvester equation stuff..."
    print >> out
    print >> out, "X:"
    print >> out, X
    print >> out
    print >> out, "T:"
    print >> out, T
    print >> out
    print >> out, "inv(T):"
    print >> out, T_inv
    print >> out
    print >> out, "inv(T) Q T:"
    print >> out, np.dot(T_inv, np.dot(Q, T))
    print >> out
    print >> out, "U.T D^(1/2) inv(T) Q T D^(-1/2) U:"
    print >> out, ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U)
    print >> out
    print >> out, "expm(Q):"
    print >> out, scipy.linalg.expm(Q)
    print >> out
    print >> out, "T D^-1/2 U exp(w) U.T D^1/2 T^-1"
    print >> out, ndot(T, D_sqrt_recip, U, np.diag(np.exp(w_full)), U.T, D_sqrt, T_inv)
    print >> out
    return out.getvalue()
Ejemplo n.º 43
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # do the analysis
    n = fs.nstates
    pi_m = sample_distribution(n)
    pi_q = sample_distribution(n)
    v = np.log(np.sqrt(pi_m / pi_q))
    K = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            x = v[j] - v[i]
            if x:
                K[i, j] = x / math.sinh(x)
            else:
                K[i, j] = 1.0
    W, V = scipy.linalg.eigh(K)
    # make a gtr mutation matrix
    S_precursor = sample_symmetric_rate_matrix(n)
    M = to_gtr_c(S_precursor, pi_m)
    M_distn = R_to_distn(M)
    if not np.allclose(M_distn, pi_m):
        raise ValueError('stationary distribution error')
    # resymmetrize
    lam = np.diag(np.sqrt(pi_m))
    rlam = np.diag(np.reciprocal(np.sqrt(pi_m)))
    S = ndot(lam, M, rlam)
    R = S * K
    lam = np.diag(np.sqrt(pi_q))
    rlam = np.diag(np.reciprocal(np.sqrt(pi_q)))
    Q_from_R = ndot(rlam, R, lam)
    Q_from_R -= np.diag(np.sum(Q_from_R, axis=1))
    Q_from_S = ndot(rlam, S, lam)
    Q_from_S -= np.diag(np.sum(Q_from_S, axis=1))
    Q_from_precursor = to_gtr_c(S_precursor, pi_q)
    # write the report
    print >> out, 'mutation process stationary distribution:'
    print >> out, pi_m
    print >> out
    print >> out, 'selection process stationary distribution:'
    print >> out, pi_q
    print >> out
    print >> out, 'vector to which the kernel function is applied:'
    print >> out, v
    print >> out
    print >> out, 'kernel matrix K:'
    print >> out, K
    print >> out
    print >> out, 'eigenvalues of K:'
    print >> out, W
    print >> out
    print >> out, 'eigenvectors of K:'
    print >> out, V
    print >> out
    print >> out, 'symmetric precursor matrix:'
    print >> out, S_precursor
    print >> out
    print >> out, 'rate matrix M:'
    print >> out, M
    print >> out
    print >> out, 'symmetrization S of rate matrix M:'
    print >> out, S
    print >> out
    print >> out
    print >> out, 'symmetrization R = S o K'
    print >> out, R
    print >> out
    print >> out, 'de-symmetrized rate matrix derived from R:'
    print >> out, Q_from_R
    print >> out
    print >> out
    print >> out, 'de-symmetrized rate matrix derived from S:'
    print >> out, Q_from_S
    print >> out
    print >> out
    print >> out, 'rate matrix derived from precursor rate matrix:'
    print >> out, Q_from_precursor
    print >> out
    return out.getvalue()
Ejemplo n.º 44
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix and its re-symmetrization
    S = mrate.symmetrized(R)
    distn = mrate.R_to_distn(R)
    w, U = np.linalg.eigh(S)
    D = np.diag(U.T[-1])**2
    D_inv = np.diag(np.reciprocal(U.T[-1]))**2
    for t in (1.0, 2.0):
        P = scipy.linalg.expm(R * t)
        M = ndot(D**.5, scipy.linalg.expm(S * t), D**.5)
        M_star = ndot(D_inv**.5, scipy.linalg.expm(S * t), D_inv**.5)
        M_star_log = np.log(M_star)
        M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log)
        E = M * np.log(M_star)
        E_w, E_U = np.linalg.eigh(E)
        print >> out, 't:'
        print >> out, t
        print >> out
        print >> out, 'randomly sampled rate matrix R'
        print >> out, R
        print >> out
        print >> out, 'symmetrized matrix S'
        print >> out, S
        print >> out
        print >> out, 'stationary distribution diagonal D'
        print >> out, D
        print >> out
        print >> out, 'R = D^-1/2 S D^1/2'
        print >> out, ndot(D_inv**.5, S, D**.5)
        print >> out
        print >> out, 'probability matrix e^(R*t) = P'
        print >> out, P
        print >> out
        print >> out, 'P = D^-1/2 e^(S*t) D^1/2'
        print >> out, ndot(D_inv**.5, scipy.linalg.expm(S * t), D**.5)
        print >> out
        print >> out, 'pairwise distribution matrix M'
        print >> out, 'M = D^1/2 e^(S*t) D^1/2'
        print >> out, M
        print >> out
        print >> out, 'sum of entries of M'
        print >> out, np.sum(M)
        print >> out
        print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2'
        print >> out, M_star
        print >> out
        print >> out, 'entrywise logarithm logij(M_star)'
        print >> out, np.log(M_star)
        print >> out
        print >> out, 'Hadamard product M o logij(M_star) = E'
        print >> out, E
        print >> out
        print >> out, 'spectrum of M:'
        print >> out, np.linalg.eigvalsh(M)
        print >> out
        print >> out, 'spectrum of logij(M_star):'
        print >> out, M_star_log_w
        print >> out
        print >> out, 'corresponding eigenvectors of logij(M_star) as columns:'
        print >> out, M_star_log_U
        print >> out
        print >> out, 'spectrum of E:'
        print >> out, E_w
        print >> out
        print >> out, 'corresponding eigenvectors of E as columns:'
        print >> out, E_U
        print >> out
        print >> out, 'entrywise square roots of stationary distribution:'
        print >> out, np.sqrt(v)
        print >> out
        print >> out, 'sum of entries of E:'
        print >> out, np.sum(E)
        print >> out
        print >> out, 'mutual information:'
        print >> out, ctmcmi.get_mutual_information(R, t)
        print >> out
        print >> out
    return out.getvalue()
Ejemplo n.º 45
0
def process(fs):
    n = fs.nstates
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # Sample a symmetric rate matrix and a stationary distribution,
    # then construct the rate matrix R.
    S = MatrixUtil.sample_pos_sym_matrix(n)
    v = mrate.sample_distn(n)
    psi = np.sqrt(v)
    R = (S.T / psi).T * psi
    R -= np.diag(np.sum(R, axis=1))
    R_W, R_V = scipy.linalg.eig(R)
    # construct the symmetric matrix that is similar to R
    R_sim = (R.T * psi).T / psi
    if not np.allclose(R_sim, R_sim.T):
        raise ValueError('the similar symmetric matrix is not symmetric...')
    R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim)
    R_gap = -R_sim_W[-2]
    v2 = R_sim_V.T[-2]**2
    # reconstruct the eigenvectors of R
    R_V_rebuilt = (R_sim_V.T / psi).T
    # Try to make the commute time matrix.
    # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it.
    R_sim_pinv = scipy.linalg.pinv(R_sim)
    myouter = np.outer(np.ones(n), np.diag(R_sim_pinv))
    D = -(myouter + myouter.T - 2*R_sim_pinv)
    D_commute = mrate.get_commute_distance_matrix(R, v)
    if not np.allclose(D, D_commute):
        raise ValueError('error computing commute distances')
    HDH = MatrixUtil.double_centered(D)
    HDH_W, HDH_V = scipy.linalg.eigh(HDH)
    # compute squared pairwise distances brutely
    X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1])
    D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X])
    print >> out, 'reconstructed EDM:'
    print >> out, D
    print >> out
    D = (D.T / psi).T / psi
    print >> out, 'divide by square roots of stationary probabilities:'
    print >> out, D
    print >> out
    print >> out, 'eigh of centered EDM:'
    print >> out, 'eigenvalues:'
    print >> out, HDH_W
    print >> out, 'reciprocal nonzero eigenvalues:'
    print >> out, 1 / HDH_W
    print >> out, 'eigenvectors:'
    print >> out, HDH_V
    print >> out
    print >> out, 'squared distances computed brutely:'
    print >> out, D_brute
    print >> out
    print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1-v) * np.max(D))
    print >> out, '1 / max(D):', 1 / np.max(D)
    print >> out
    # report some more standard stuff
    print >> out, 'sampled rate matrix R:'
    print >> out, R
    print >> out, 'stationary distn:', v
    print >> out, '1/R01 + 1/R10:', 1/R[0,1] + 1/R[1,0]
    print >> out
    print >> out, 'scipy.linagl.eig(R):'
    print >> out, R_W
    print >> out, R_V
    print >> out
    print >> out, 'symmetric matrix similar to R:'
    print >> out, R_sim
    print >> out
    print >> out, 'eigh of the symmetric similar matrix to R:'
    print >> out, R_sim_W
    print >> out, R_sim_V
    print >> out, 'spectral gap:', R_gap
    print >> out, 'entrywise squares of eigenvectors:'
    print >> out, R_sim_V ** 2
    print >> out, 'a bilinear form involving a fiedler-like eigenvector:'
    print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2])
    print >> out, 'expected rate:', -np.dot(v, np.diag(R))
    print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R))
    print >> out
    print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:'
    print >> out, R_sim_W
    print >> out, R_V_rebuilt
    print >> out
    return out.getvalue().rstrip()
Ejemplo n.º 46
0
def process(fs):
    nstates = fs.nstates
    np.set_printoptions(linewidth=200)
    t = fs.t
    ### sample a random time
    ##time_mu = 0.01
    ##t = random.expovariate(1 / time_mu)
    # Sample a rate matrix.
    # Use a trick by Robert Kern to left and right multiply by diagonals.
    # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/
    # 026809.html
    S = MatrixUtil.sample_pos_sym_matrix(nstates)
    v = mrate.sample_distn(nstates)
    R = (v**-0.5)[:,np.newaxis] * S * (v**0.5)
    R -= np.diag(np.sum(R, axis=1))
    # Construct a parent-independent process
    # with the same max rate and stationary distribution
    # as the sampled process.
    if fs.parent_indep:
        Q = np.outer(np.ones(nstates), v)
        Q -= np.diag(np.sum(Q, axis=1))
        pi_rescaling_factor = max(np.diag(R) / np.diag(Q))
        Q *= pi_rescaling_factor
        Z = msimpl.get_fast_meta_f81_autobarrier(Q)
    # Construct a child-independent process
    # with the same expected rate
    # as the sampled process
    if fs.child_indep:
        C = np.outer(1/v, np.ones(nstates))
        C -= np.diag(np.sum(C, axis=1))
        ci_rescaling_factor = np.max(R / C)
        #expected_rate = -ndot(np.diag(R), v)
        #ci_rescaling_factor = expected_rate / (nstates*(nstates-1))
        #ci_rescaling_factor = expected_rate / (nstates*nstates)
        C *= ci_rescaling_factor
        Q = C
    if fs.bipartitioned:
        Q = msimpl.get_fast_meta_f81_autobarrier(R)
    # Check that the mutual information of the
    # parent independent process is smaller.
    out = StringIO()
    print >> out, 'sampled symmetric part of the rate matrix S:'
    print >> out, S
    print >> out
    print >> out, 'sampled stationary distribution v:'
    print >> out, v
    print >> out
    print >> out, 'shannon entropy of stationary distribution v:'
    print >> out, -np.dot(np.log(v), v)
    print >> out
    print >> out, 'sqrt stationary distribution:'
    print >> out, np.sqrt(v)
    print >> out
    print >> out, 'implied rate matrix R:'
    print >> out, R
    print >> out
    print >> out, 'eigenvalues of R:', scipy.linalg.eigvals(R)
    print >> out
    print >> out, 'relaxation rate of R:',
    print >> out, sorted(np.abs(scipy.linalg.eigvals(R)))[1]
    print >> out
    print >> out, 'expected rate of R:', mrate.Q_to_expected_rate(R)
    print >> out
    print >> out, 'cheeger bounds of R:', get_cheeger_bounds(R, v)
    print >> out
    print >> out, 'randomization rate of R:', get_randomization_rate(R, v)
    print >> out
    candidates = [get_randomization_candidate(R, v, i) for i in range(nstates)]
    if np.allclose(get_randomization_rate(R, v), candidates):
        print >> out, 'all candidates are equal to this rate'
    else:
        print >> out, 'not all candidates are equal to this rate'
    print >> out
    print >> out, 'simplified rate matrix Q:'
    print >> out, Q
    print >> out
    qv = mrate.R_to_distn(Q)
    print >> out, 'stationary distribution of Q:'
    print >> out, qv
    print >> out
    print >> out, 'ratio qv/v:'
    print >> out, qv / v
    print >> out
    print >> out, 'shannon entropy of stationary distribution of Q:'
    print >> out, -np.dot(np.log(qv), qv)
    print >> out
    if fs.parent_indep:
        print >> out, 'parent independent rescaling factor:'
        print >> out, pi_rescaling_factor
        print >> out
    if fs.child_indep:
        print >> out, 'child independent rescaling factor:'
        print >> out, ci_rescaling_factor
        print >> out
    print >> out, 'eigenvalues of Q:', scipy.linalg.eigvals(Q)
    print >> out
    print >> out, 'relaxation rate of Q:',
    print >> out, sorted(np.abs(scipy.linalg.eigvals(Q)))[1]
    print >> out
    print >> out, 'expected rate of Q:', mrate.Q_to_expected_rate(Q)
    print >> out
    print >> out, 'cheeger bounds of Q:', get_cheeger_bounds(Q, v)
    print >> out
    print >> out, 'randomization rate of Q:', get_randomization_rate(Q, v)
    print >> out
    candidates = [get_randomization_candidate(Q, v, i) for i in range(nstates)]
    if np.allclose(get_randomization_rate(Q, v), candidates):
        print >> out, 'all candidates are equal to this rate'
    else:
        print >> out, 'warning: not all candidates are equal to this rate'
    print >> out
    print >> out, 'E(rate) of Q divided by logical entropy:',
    print >> out, mrate.Q_to_expected_rate(Q) / ndot(v, 1-v)
    print >> out
    print >> out, 'symmetric matrix similar to Q:'
    S = ndot(np.diag(np.sqrt(v)), Q, np.diag(1/np.sqrt(v)))
    print >> out, S
    print >> out
    print >> out, 'eigendecomposition of the similar matrix:'
    W, V = scipy.linalg.eigh(S)
    print >> out, V
    print >> out, np.diag(W)
    print >> out, V.T
    print >> out
    #
    print >> out, 'time:', t
    print >> out
    print >> out, 'stationary distn logical entropy:', ndot(v, 1-v)
    print >> out
    # 
    P_by_hand = get_pi_transition_matrix(Q, v, t)
    print >> out, 'simplified-process transition matrix computed by hand:'
    print >> out, P_by_hand
    print >> out
    print >> out, 'simplified-process transition matrix computed by expm:'
    print >> out, scipy.linalg.expm(Q*t)
    print >> out
    #
    print >> out, 'simplified-process m.i. by hand:'
    print >> out, get_pi_mi(Q, v, t)
    print >> out
    print >> out, 'simplified-process m.i. by expm:'
    print >> out, ctmcmi.get_expected_ll_ratio(Q, t)
    print >> out
    #
    print >> out, 'original process m.i. by expm:'
    print >> out, ctmcmi.get_expected_ll_ratio(R, t)
    print >> out
    #
    print >> out, 'stationary distn Shannon entropy:'
    print >> out, -ndot(v, np.log(v))
    print >> out
    #
    if fs.parent_indep:
        print >> out, 'approximate simplified process m.i. 2nd order approx:'
        print >> out, get_pi_mi_t2_approx(Q, v, t)
        print >> out
        print >> out, 'approximate simplified process m.i. "better" approx:'
        print >> out, get_pi_mi_t2_diag_approx(Q, v, t)
        print >> out
        print >> out, '"f81-ization plus barrier" of pure f81-ization:'
        print >> out, Z
        print >> out
    #
    return out.getvalue().rstrip()
Ejemplo n.º 47
0
def get_response_content(fs):
    # read the energies from the form data
    energies = []
    for line in iterutils.stripped_lines(fs.energies.splitlines()):
        try:
            energy = float(line)
        except ValueError as e:
            raise ValueError('invalid energy: %s' % line)
        energies.append(energy)
    n = len(energies)
    if n > 100:
        raise ValueError('too many energies')
    # compute the rate matrix
    R = np.zeros((n, n))
    for row in range(n):
        for col in range(n):
            rate = math.exp(-(energies[col] - energies[row]))
            R[row, col] = rate
    for i, r in enumerate(R):
        R[i, i] = -np.sum(r) + 1
    # get the transition matrix at large finite time
    large_t = 1000.0
    T = scipy.linalg.expm(R * large_t)
    # eigendecompose
    Wr, Vr = scipy.linalg.eig(R, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(R, left=True, right=False)
    # get left eigenvector associated with stationary distribution
    val_vec_pairs = [(abs(Wl[i]), Vl[:, i]) for i in range(n)]
    dummy, pi_eigenvector = min(val_vec_pairs)
    # get the stationary distribution itself
    total = np.sum(pi_eigenvector)
    pi_arr = np.array([v / total for v in pi_eigenvector])
    # get the square root stationary vector and diagonal matrix
    sqrt_pi_arr = np.sqrt(pi_arr)
    lam = np.diag(sqrt_pi_arr)
    # get reciprocal arrays
    recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr)
    recip_lam = np.reciprocal(lam)
    # print things
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'rate matrix row sums:'
    print >> out, np.sum(R, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'L2 normalized eigenvector associated with stationary distn:'
    print >> out, pi_eigenvector
    print >> out
    print >> out, 'L1 renormalized vector (the stationary distribution):'
    print >> out, pi_arr
    print >> out
    print >> out
    # eigendecompose the transition matrix
    Wr, Vr = scipy.linalg.eig(T, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(T, left=True, right=False)
    print >> out, 'transition matrix for t=%f:' % large_t
    print >> out, T
    print >> out
    print >> out, 'transition matrix row sums:'
    print >> out, np.sum(T, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'incorrect reconstitution of the transition matrix:'
    print >> out, ndot(Vr, np.diag(Wr), Vl.T)
    print >> out
    print >> out
    # Use the known properties of reversibility to symmetrize the matrix.
    t = 3
    coeffs, rates, c = get_identicality_params(R)
    print >> out, 'brute identicality computation for t=%f:' % t
    print >> out, get_numerical_identicality(R, t)
    print >> out
    print >> out, 'sophisticated identicality computation for t=%f:' % t
    print >> out, get_symbolic_identicality(coeffs, rates, c, t)
    print >> out
    print >> out
    # Try another couple rate matrices.
    e2 = math.exp(2)
    en2 = math.exp(-2)
    rate_matrices = [
        np.array([[-2.0, 2.0], [2.0, -2.0]]),
        np.array([[-1.0, 1.0], [3.0, -3.0]]),
        np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]),
        #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])]
        #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])]
        np.array([[-en2, en2, 0], [e2, -2 * e2, e2], [0, en2, -en2]])
    ]
    t = 3.0
    for R in rate_matrices:
        coeffs, rates, c = get_identicality_params(R)
        print >> out, 'test rate matrix:'
        print >> out, R
        print >> out
        print >> out, 'eigenvalues:'
        print >> out, scipy.linalg.eigvals(R)
        print >> out
        print >> out, 'stationary distribution:'
        print >> out, R_to_distn(R)
        print >> out
        print >> out, 'brute identicality computation for t=%f:' % t
        print >> out, get_numerical_identicality(R, t)
        print >> out
        print >> out, 'sophisticated identicality computation for t=%f:' % t
        print >> out, get_symbolic_identicality(coeffs, rates, c, t)
        print >> out
        print >> out, 'identicality derivative for t=%f:' % t
        print >> out, get_identicality_derivative(coeffs, rates, t)
        print >> out
        print >> out
    # return the message
    return out.getvalue().rstrip()
Ejemplo n.º 48
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix and its re-symmetrization
    S = mrate.symmetrized(R)
    distn = mrate.R_to_distn(R)
    w, U = np.linalg.eigh(S)
    D = np.diag(U.T[-1])**2
    D_inv = np.diag(np.reciprocal(U.T[-1]))**2
    for t in (1.0, 2.0):
        P = scipy.linalg.expm(R*t)
        M = ndot(D**.5, scipy.linalg.expm(S*t), D**.5)
        M_star = ndot(D_inv**.5, scipy.linalg.expm(S*t), D_inv**.5)
        M_star_log = np.log(M_star)
        M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log)
        E = M * np.log(M_star)
        E_w, E_U = np.linalg.eigh(E)
        print >> out, 't:'
        print >> out, t
        print >> out
        print >> out, 'randomly sampled rate matrix R'
        print >> out, R
        print >> out
        print >> out, 'symmetrized matrix S'
        print >> out, S
        print >> out
        print >> out, 'stationary distribution diagonal D'
        print >> out, D
        print >> out
        print >> out, 'R = D^-1/2 S D^1/2'
        print >> out, ndot(D_inv**.5, S, D**.5)
        print >> out
        print >> out, 'probability matrix e^(R*t) = P'
        print >> out, P
        print >> out
        print >> out, 'P = D^-1/2 e^(S*t) D^1/2'
        print >> out, ndot(D_inv**.5, scipy.linalg.expm(S*t), D**.5)
        print >> out
        print >> out, 'pairwise distribution matrix M'
        print >> out, 'M = D^1/2 e^(S*t) D^1/2'
        print >> out, M
        print >> out
        print >> out, 'sum of entries of M'
        print >> out, np.sum(M)
        print >> out
        print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2'
        print >> out, M_star
        print >> out
        print >> out, 'entrywise logarithm logij(M_star)'
        print >> out, np.log(M_star)
        print >> out
        print >> out, 'Hadamard product M o logij(M_star) = E'
        print >> out, E
        print >> out
        print >> out, 'spectrum of M:'
        print >> out, np.linalg.eigvalsh(M)
        print >> out
        print >> out, 'spectrum of logij(M_star):'
        print >> out, M_star_log_w
        print >> out
        print >> out, 'corresponding eigenvectors of logij(M_star) as columns:'
        print >> out, M_star_log_U
        print >> out
        print >> out, 'spectrum of E:'
        print >> out, E_w
        print >> out
        print >> out, 'corresponding eigenvectors of E as columns:'
        print >> out, E_U
        print >> out
        print >> out, 'entrywise square roots of stationary distribution:'
        print >> out, np.sqrt(v)
        print >> out
        print >> out, 'sum of entries of E:'
        print >> out, np.sum(E)
        print >> out
        print >> out, 'mutual information:'
        print >> out, ctmcmi.get_mutual_information(R, t)
        print >> out
        print >> out
    return out.getvalue()
Ejemplo n.º 49
0
def process(fs):
    n = fs.nstates
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # Sample a symmetric rate matrix and a stationary distribution,
    # then construct the rate matrix R.
    S = MatrixUtil.sample_pos_sym_matrix(n)
    v = mrate.sample_distn(n)
    psi = np.sqrt(v)
    R = (S.T / psi).T * psi
    R -= np.diag(np.sum(R, axis=1))
    R_W, R_V = scipy.linalg.eig(R)
    # construct the symmetric matrix that is similar to R
    R_sim = (R.T * psi).T / psi
    if not np.allclose(R_sim, R_sim.T):
        raise ValueError('the similar symmetric matrix is not symmetric...')
    R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim)
    R_gap = -R_sim_W[-2]
    v2 = R_sim_V.T[-2]**2
    # reconstruct the eigenvectors of R
    R_V_rebuilt = (R_sim_V.T / psi).T
    # Sample some numbers then subtract mean then normalize.
    dv = np.random.exponential(1, n)
    dv -= np.mean(dv)
    dv *= fs.eps / np.dot(dv, dv)
    qv = v + dv
    if any(qv < 0) or any(1 < qv):
        raise ValueError(
            'the stationary distribution change was too large '
            'for the randomly sampled process')
    qpsi = np.sqrt(qv)
    # define the rate matrix
    if fs.knudsen:
        Q = (S.T / qpsi).T * qpsi
    elif fs.sella:
        Q = R.copy()
        for a in range(n):
            for b in range(n):
                if a != b:
                    tau = (qv[b] / v[b]) / (qv[a] / v[a])
                    Q[a, b] *= math.log(tau) / (1 - 1/tau)
    Q -= np.diag(np.sum(Q, axis=1))
    # construct the symmetric matrix that is similar to Q
    Q_sim = (Q.T * qpsi).T / qpsi
    Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim)
    Q_gap = -Q_sim_W[-2]
    # report some stuff
    print >> out, 'sampled rate matrix R:'
    print >> out, R
    print >> out, 'stationary distn:', v
    print >> out
    print >> out, 'scipy.linagl.eig(R):'
    print >> out, R_W
    print >> out, R_V
    print >> out
    print >> out, 'symmetric matrix similar to R:'
    print >> out, R_sim
    print >> out
    print >> out, 'eigh of the symmetric similar matrix to R:'
    print >> out, R_sim_W
    print >> out, R_sim_V
    print >> out, 'spectral gap:', R_gap
    print >> out, 'entrywise squares of eigenvectors:'
    print >> out, R_sim_V ** 2
    print >> out, 'a bilinear form involving a fiedler-like eigenvector:'
    print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2])
    print >> out, 'expected rate:', -np.dot(v, np.diag(R))
    print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R))
    print >> out
    print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:'
    print >> out, R_sim_W
    print >> out, R_V_rebuilt
    print >> out
    print >> out, 'mutation-selection balance matrix Q:'
    print >> out, Q
    print >> out, 'stationary distn:', qv
    print >> out, 'spectral gap:', Q_gap
    print >> out
    print >> out, 'symmetric matrix similar to Q:'
    print >> out, Q_sim
    print >> out
    print >> out, 'pi(Q) - pi(R):', dv
    print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap
    print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R)
    print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R)
    print >> out
    print >> out, 'rate away estimate of spectral gap change:'
    print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2)
    print >> out
    return out.getvalue().rstrip()
Ejemplo n.º 50
0
def process(fs):
    n = fs.nstates
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # Sample a symmetric rate matrix and a stationary distribution,
    # then construct the rate matrix R.
    S = MatrixUtil.sample_pos_sym_matrix(n)
    v = mrate.sample_distn(n)
    psi = np.sqrt(v)
    R = (S.T / psi).T * psi
    R -= np.diag(np.sum(R, axis=1))
    R_W, R_V = scipy.linalg.eig(R)
    # construct the symmetric matrix that is similar to R
    R_sim = (R.T * psi).T / psi
    if not np.allclose(R_sim, R_sim.T):
        raise ValueError('the similar symmetric matrix is not symmetric...')
    R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim)
    R_gap = -R_sim_W[-2]
    v2 = R_sim_V.T[-2]**2
    # reconstruct the eigenvectors of R
    R_V_rebuilt = (R_sim_V.T / psi).T
    # Sample some numbers then subtract mean then normalize.
    dv = np.random.exponential(1, n)
    dv -= np.mean(dv)
    dv *= fs.eps / np.dot(dv, dv)
    qv = v + dv
    if any(qv < 0) or any(1 < qv):
        raise ValueError('the stationary distribution change was too large '
                         'for the randomly sampled process')
    qpsi = np.sqrt(qv)
    # define the rate matrix
    if fs.knudsen:
        Q = (S.T / qpsi).T * qpsi
    elif fs.sella:
        Q = R.copy()
        for a in range(n):
            for b in range(n):
                if a != b:
                    tau = (qv[b] / v[b]) / (qv[a] / v[a])
                    Q[a, b] *= math.log(tau) / (1 - 1 / tau)
    Q -= np.diag(np.sum(Q, axis=1))
    # construct the symmetric matrix that is similar to Q
    Q_sim = (Q.T * qpsi).T / qpsi
    Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim)
    Q_gap = -Q_sim_W[-2]
    # report some stuff
    print >> out, 'sampled rate matrix R:'
    print >> out, R
    print >> out, 'stationary distn:', v
    print >> out
    print >> out, 'scipy.linagl.eig(R):'
    print >> out, R_W
    print >> out, R_V
    print >> out
    print >> out, 'symmetric matrix similar to R:'
    print >> out, R_sim
    print >> out
    print >> out, 'eigh of the symmetric similar matrix to R:'
    print >> out, R_sim_W
    print >> out, R_sim_V
    print >> out, 'spectral gap:', R_gap
    print >> out, 'entrywise squares of eigenvectors:'
    print >> out, R_sim_V**2
    print >> out, 'a bilinear form involving a fiedler-like eigenvector:'
    print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2])
    print >> out, 'expected rate:', -np.dot(v, np.diag(R))
    print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R))
    print >> out
    print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:'
    print >> out, R_sim_W
    print >> out, R_V_rebuilt
    print >> out
    print >> out, 'mutation-selection balance matrix Q:'
    print >> out, Q
    print >> out, 'stationary distn:', qv
    print >> out, 'spectral gap:', Q_gap
    print >> out
    print >> out, 'symmetric matrix similar to Q:'
    print >> out, Q_sim
    print >> out
    print >> out, 'pi(Q) - pi(R):', dv
    print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap
    print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R)
    print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R)
    print >> out
    print >> out, 'rate away estimate of spectral gap change:'
    print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2)
    print >> out
    return out.getvalue().rstrip()
Ejemplo n.º 51
0
def get_response_content(fs):
    # read the energies from the form data
    energies = []
    for line in iterutils.stripped_lines(fs.energies.splitlines()):
        try:
            energy = float(line)
        except ValueError as e:
            raise ValueError('invalid energy: %s' % line)
        energies.append(energy)
    n = len(energies)
    if n > 100:
        raise ValueError('too many energies')
    # compute the rate matrix
    R = np.zeros((n, n))
    for row in range(n):
        for col in range(n):
            rate = math.exp(-(energies[col] - energies[row]))
            R[row, col] = rate
    for i, r in enumerate(R):
        R[i, i] = -np.sum(r) + 1
    # get the transition matrix at large finite time
    large_t = 1000.0
    T = scipy.linalg.expm(R*large_t)
    # eigendecompose
    Wr, Vr = scipy.linalg.eig(R, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(R, left=True, right=False)
    # get left eigenvector associated with stationary distribution
    val_vec_pairs = [(abs(Wl[i]), Vl[:,i]) for i in range(n)]
    dummy, pi_eigenvector = min(val_vec_pairs)
    # get the stationary distribution itself
    total = np.sum(pi_eigenvector)
    pi_arr = np.array([v/total for v in pi_eigenvector])
    # get the square root stationary vector and diagonal matrix
    sqrt_pi_arr = np.sqrt(pi_arr)
    lam = np.diag(sqrt_pi_arr)
    # get reciprocal arrays
    recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr)
    recip_lam = np.reciprocal(lam)
    # print things
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'rate matrix row sums:'
    print >> out, np.sum(R, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'L2 normalized eigenvector associated with stationary distn:'
    print >> out, pi_eigenvector
    print >> out
    print >> out, 'L1 renormalized vector (the stationary distribution):'
    print >> out, pi_arr
    print >> out
    print >> out
    # eigendecompose the transition matrix
    Wr, Vr = scipy.linalg.eig(T, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(T, left=True, right=False)
    print >> out, 'transition matrix for t=%f:' % large_t
    print >> out, T
    print >> out
    print >> out, 'transition matrix row sums:'
    print >> out, np.sum(T, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'incorrect reconstitution of the transition matrix:'
    print >> out, ndot(Vr, np.diag(Wr), Vl.T)
    print >> out
    print >> out
    # Use the known properties of reversibility to symmetrize the matrix.
    t = 3
    coeffs, rates, c = get_identicality_params(R)
    print >> out, 'brute identicality computation for t=%f:' % t
    print >> out, get_numerical_identicality(R, t)
    print >> out
    print >> out, 'sophisticated identicality computation for t=%f:' % t
    print >> out, get_symbolic_identicality(coeffs, rates, c, t)
    print >> out
    print >> out
    # Try another couple rate matrices.
    e2 = math.exp(2)
    en2 = math.exp(-2)
    rate_matrices = [
            np.array([[-2.0, 2.0], [2.0, -2.0]]),
            np.array([[-1.0, 1.0], [3.0, -3.0]]),
            np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]),
            #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])]
            #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])]
            np.array([[-en2, en2, 0], [e2, -2*e2, e2], [0, en2, -en2]])]
    t = 3.0
    for R in rate_matrices:
        coeffs, rates, c = get_identicality_params(R)
        print >> out, 'test rate matrix:'
        print >> out, R
        print >> out
        print >> out, 'eigenvalues:'
        print >> out, scipy.linalg.eigvals(R)
        print >> out
        print >> out, 'stationary distribution:'
        print >> out, R_to_distn(R)
        print >> out
        print >> out, 'brute identicality computation for t=%f:' % t
        print >> out, get_numerical_identicality(R, t)
        print >> out
        print >> out, 'sophisticated identicality computation for t=%f:' % t
        print >> out, get_symbolic_identicality(coeffs, rates, c, t)
        print >> out
        print >> out, 'identicality derivative for t=%f:' % t
        print >> out, get_identicality_derivative(coeffs, rates, t)
        print >> out
        print >> out
    # return the message
    return out.getvalue().rstrip()