Exemple #1
0
def qr_house_basic(A):
    """ computes QR decomposition using Householder relections
    qr_house_basic and build_Q have the same effect as qr_house
    
    Parameters
    ----------
    A: array_like
       shape(A) = (M, N), M >= N
       overwritten on exit
    
    Returns
    -------
    A: array_like
        strict lower triangular part contains the Householder vectors v
        upper triangular matrix R
    
    betas: array_like
        2-norms of the Householder vectors v
    """
    
    M,N = A.shape
    beta_list = []
    for n in range(N):
        v,beta = house(A[n:,n:n+1])
        A[n:,n:] -= beta * algopy.dot(v, algopy.dot(v.T,A[n:,n:]))
        
        beta_list.append(beta)
        if n < M:
            A[n+1:,n] = v[1:,0]
    return A, numpy.asarray(beta_list)
def centered_tree_covariance(B, nleaves, v):
    """
    @param B: rows of this unweighted incidence matrix are edges
    @param nleaves: number of leaves
    @param v: vector of edge variances
    """
    #TODO: track the block multiplication through the schur complement
    W = diag(reciprocal(v))
    L = dot(B.T, dot(W, B))
    #print('full laplacian matrix:')
    #print(L)
    #print()
    nvertices = v.shape[0]
    ninternal = nvertices - nleaves
    Laa = L[:nleaves, :nleaves]
    Lab = L[:nleaves, nleaves:]
    Lba = L[nleaves:, :nleaves]
    Lbb = L[nleaves:, nleaves:]
    L_schur = Laa - dot(Lab, dot(inv(Lbb), Lba))
    L_schur_pinv = restored(inv(augmented(L_schur)))
    #print('schur laplacian matrix:')
    #print(L_schur)
    #print()
    #print('pinv of schur laplacian matrix:')
    #print(L_schur_pinv)
    #print()
    return L_schur_pinv
def get_Q_slsqp(
        ts, tv, syn, nonsyn, compo, asym_compo,
        h,
        log_counts, v,
        theta):
    #FIXME: hardcoded for selection without recessivity parameters
    #
    # unpack theta
    branch_length = theta[0]
    kappa = theta[1]
    omega = theta[2]
    """
    nt_probs = algopy.zeros(4, dtype=theta)
    nt_probs[0] = theta[3]
    nt_probs[1] = theta[4]
    nt_probs[2] = theta[5]
    nt_probs[3] = 1.0 - algopy.sum(nt_probs)
    print nt_probs
    log_nt_weights = algopy.log(nt_probs)
    """
    log_nt_weights = theta[-4:]
    #
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q_exch = (kappa * ts + tv) * (omega * nonsyn + syn)
    pre_Q = pre_Q_exch * algopy.exp(
            algopy.dot(asym_compo, log_nt_weights)) * h(S)
    rates = algopy.sum(pre_Q, axis=1)
    Q = pre_Q - algopy.diag(rates)
    Q *= branch_length / algopy.dot(rates, v)
    return Q
Exemple #4
0
def build_Q(A, betas):
    """ computes orthogonal matrix from output of qr_house_basic
    
    Parameters
    ----------
    A: array_likse
        shape(A) = (M,N)
        upper triangular part contains R
        lower triangular part contains v with v[0] = 1
    betas: array_like
        list of beta
        
    Returns
    -------
    Q: array_like
        shape(Q) = (M,M)

    """
    
    M,N = A.shape
    Q = algopy.zeros((M,M),dtype=A)
    Q += numpy.eye(M)
    H = algopy.zeros((M,M),dtype=A)
    for n in range(N):
        v = A[n:,n:n+1].copy()
        v[0] = 1
        H[...] = numpy.eye(M)
        H[n:,n:] -= betas[n] * algopy.dot(v,v.T)
        Q = algopy.dot(Q,H)
    return Q
Exemple #5
0
def build_Q(A, betas):
    """ computes orthogonal matrix from output of qr_house_basic
    
    Parameters
    ----------
    A: array_likse
        shape(A) = (M,N)
        upper triangular part contains R
        lower triangular part contains v with v[0] = 1
    betas: array_like
        list of beta
        
    Returns
    -------
    Q: array_like
        shape(Q) = (M,M)

    """
    
    M,N = A.shape
    Q = algopy.zeros((M,M),dtype=A)
    Q += numpy.eye(M)
    H = algopy.zeros((M,M),dtype=A)
    for n in range(N):
        v = A[n:,n:n+1].copy()
        v[0] = 1
        H[...] = numpy.eye(M)
        H[n:,n:] -= betas[n] * algopy.dot(v,v.T)
        Q = algopy.dot(Q,H)
    return Q
Exemple #6
0
def get_Q(gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g,
          log_omega, log_nt_weights):
    """
    Most of the notation is from Yang and Nielsen 2008.
    The first group of args consists of precomputed ndarrays.
    The second group is only the fixation function.
    The third group consists of empirically (non-free) estimated parameters.
    The fourth group depends only on free parameters.
    @param gtr: ndim-3 ndarray indicating the nucleotide exchange type
    @param syn: indicator for synonymous codons
    @param nonsyn: indicator for nonsynonymous codons
    @param compo: site independent nucleotide composition per codon
    @param asym_compo: tensor from get_asym_compo function
    @param h: fixation function
    @param log_counts: empirically counted codons in the data set
    @param log_mu: free param for scaling
    @param log_g: logs of six exchangeabilities
    @param log_omega: free param for syn nonsyn rate distinction
    @param log_nt_weights: mostly free param array for mutation equilibrium
    @return: rate matrix
    """
    mu = algopy.exp(log_mu)
    g = algopy.exp(log_g)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp(
        algopy.dot(asym_compo, log_nt_weights)) * h(S)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
def get_Q(
        gtr, syn, nonsyn, compo, asym_compo,
        h,
        log_counts,
        log_mu, log_g, log_omega, log_nt_weights):
    """
    Most of the notation is from Yang and Nielsen 2008.
    The first group of args consists of precomputed ndarrays.
    The second group is only the fixation function.
    The third group consists of empirically (non-free) estimated parameters.
    The fourth group depends only on free parameters.
    @param gtr: ndim-3 ndarray indicating the nucleotide exchange type
    @param syn: indicator for synonymous codons
    @param nonsyn: indicator for nonsynonymous codons
    @param compo: site independent nucleotide composition per codon
    @param asym_compo: tensor from get_asym_compo function
    @param h: fixation function
    @param log_counts: empirically counted codons in the data set
    @param log_mu: free param for scaling
    @param log_g: logs of six exchangeabilities
    @param log_omega: free param for syn nonsyn rate distinction
    @param log_nt_weights: mostly free param array for mutation equilibrium
    @return: rate matrix
    """
    mu = algopy.exp(log_mu)
    g = algopy.exp(log_g)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp(
            algopy.dot(asym_compo, log_nt_weights)) * h(S)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
def f(x):
    A = zeros((2,2),dtype=x)
    A[0,0] = numpy.log(x[0]*x[1])
    A[0,1] = numpy.log(x[1]) + exp(x[0])
    A[1,0] = sin(x[0])**2 + abs(cos(x[0]))**3.1
    A[1,1] = x[0]**cos(x[1])
    return log( dot(x.T,  dot( inv(A), x)))
Exemple #9
0
def fels(ov, v_to_children, pattern, de_to_P, root_prior):
    """
    The P matrices and the root prior may be algopy objects.
    @param ov: ordered vertices with child vertices before parent vertices
    @param v_to_children: map from a vertex to a sequence of child vertices
    @param pattern: an array that maps vertex to state, or to -1 if internal
    @param de_to_P: map from a directed edge to a transition matrix
    @param root_prior: equilibrium distribution at the root
    @return: log likelihood
    """
    nvertices = len(ov)
    nstates = len(root_prior)
    states = range(nstates)
    root = ov[-1]

    # Initialize the map from vertices to subtree likelihoods.
    likelihoods = algopy.ones(
            (nvertices, nstates),
            dtype=de_to_P.values()[0],
            )

    # Compute the subtree likelihoods using dynamic programming.
    for v in ov:
        for pstate in range(nstates):
            for c in v_to_children.get(v, []):
                P = de_to_P[v, c]
                likelihoods[v, pstate] *= algopy.dot(P[pstate], likelihoods[c])
        state = pattern[v]
        if state >= 0:
            for s in range(nstates):
                if s != state:
                    likelihoods[v, s] = 0

    # Get the log likelihood by summing over equilibrium states at the root.
    return algopy.log(algopy.dot(root_prior, likelihoods[root]))
 def test_hessian(self):
     N = 5
     A = numpy.random.rand(N, N)
     A = numpy.dot(A.T, A)
     x = algopy.UTPM.init_hessian(numpy.arange(N, dtype=float))
     H = algopy.UTPM.extract_hessian(N, algopy.dot(x, algopy.dot(A, x)))
     assert_array_almost_equal(A, 0.5 * H)
 def test_hessian(self):
     N = 5
     A = numpy.random.rand(N,N)
     A = numpy.dot(A.T,A)
     x = algopy.UTPM.init_hessian(numpy.arange(N,dtype=float))
     H = algopy.UTPM.extract_hessian(N, algopy.dot(x, algopy.dot(A,x)))
     assert_array_almost_equal(A, 0.5*H)
Exemple #12
0
def qr_house_basic(A):
    """ computes QR decomposition using Householder relections
    qr_house_basic and build_Q have the same effect as qr_house
    
    Parameters
    ----------
    A: array_like
       shape(A) = (M, N), M >= N
       overwritten on exit
    
    Returns
    -------
    A: array_like
        strict lower triangular part contains the Householder vectors v
        upper triangular matrix R
    
    betas: array_like
        2-norms of the Householder vectors v
    """
    
    M,N = A.shape
    beta_list = []
    for n in range(N):
        v,beta = house(A[n:,n:n+1])
        A[n:,n:] -= beta * algopy.dot(v, algopy.dot(v.T,A[n:,n:]))
        
        beta_list.append(beta)
        if n < M:
            A[n+1:,n] = v[1:,0]
    return A, numpy.asarray(beta_list)
Exemple #13
0
def f(x):
    A = zeros((2, 2), dtype=x)
    A[0, 0] = numpy.log(x[0] * x[1])
    A[0, 1] = numpy.log(x[1]) + exp(x[0])
    A[1, 0] = sin(x[0])**2 + abs(cos(x[0]))**3.1
    A[1, 1] = x[0]**cos(x[1])
    return log(dot(x.T, dot(inv(A), x)))
 def test_tensor_for_hessian_computation(self):
     N = 3
     A = numpy.random.rand(N,N)
     A = numpy.dot(A.T,A)
     x = algopy.UTPM.init_tensor(2, numpy.arange(N))
     y = 0.5*algopy.dot(x, algopy.dot(A,x))
     H = algopy.UTPM.extract_tensor(N, algopy.dot(x, algopy.dot(A,x)))
     assert_array_almost_equal(A, 0.5*H)
def _algopy_pade7(A, ident):
    b = (17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.)
    A2 = algopy.dot(A, A)
    A4 = algopy.dot(A2, A2)
    A6 = algopy.dot(A2, A4)
    U = algopy.dot(A, b[7] * A6 + b[5] * A4 + b[3] * A2 + b[1] * ident)
    V = b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * ident
    return U, V
def _algopy_pade7(A, ident):
    b = (17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.)
    A2 = algopy.dot(A, A)
    A4 = algopy.dot(A2, A2)
    A6 = algopy.dot(A2, A4)
    U = algopy.dot(A, b[7]*A6 + b[5]*A4 + b[3]*A2 + b[1]*ident)
    V = b[6]*A6 + b[4]*A4 + b[2]*A2 + b[0]*ident
    return U, V
 def test_tensor_for_hessian_computation(self):
     N = 3
     A = numpy.random.rand(N, N)
     A = numpy.dot(A.T, A)
     x = algopy.UTPM.init_tensor(2, numpy.arange(N))
     y = 0.5 * algopy.dot(x, algopy.dot(A, x))
     H = algopy.UTPM.extract_tensor(N, algopy.dot(x, algopy.dot(A, x)))
     assert_array_almost_equal(A, 0.5 * H)
def eval_covariance_matrix_qr(J1, J2):
    M, N = J1.shape
    K, N = J2.shape
    Q, R = qr_full(J2.T)
    Q2 = Q[:, K:].T
    J1_tilde = dot(J1, Q2.T)
    Q, R = qr(J1_tilde)
    V = solve(R.T, Q2)
    return dot(V.T, V)
def eval_covariance_matrix_qr(J1, J2):
    M,N = J1.shape
    K,N = J2.shape
    Q,R = qr_full(J2.T)
    Q2 = Q[:,K:].T
    J1_tilde = dot(J1,Q2.T)
    Q,R = qr(J1_tilde)
    V = solve(R.T, Q2)
    return dot(V.T,V)
Exemple #20
0
def get_Q_unconstrained(gtr, syn, nonsyn, compo, asym_compo, h, log_counts,
                        log_mu, log_g, log_omega, d, log_nt_weights):
    mu = algopy.exp(log_mu)
    g = algopy.exp(log_g)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp(
        algopy.dot(asym_compo, log_nt_weights)) * h(S, d)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
def log_likelihoods(A, xs):
    """
    @param A: doubly centered symmetric nxn matrix of rank n-1
    @param xs: vectors of data
    """
    #NOTE: this formula is wrong on wikipedia
    assert_symmetric(A)
    n = A.shape[0]
    A_pinv = restored(inv(augmented(A)))
    a = (n-1) * LOG2PI + log_pdet(A)
    bs = np.array([dot(x, dot(A_pinv, x)) for x in xs])
    return -0.5 * (a + bs)
def eval_f_eigh(Y):
    """ some reformulations to make eval_f_orig
        compatible with algopy

        replaced scipy.linalg.expm by a symmetric eigenvalue decomposition

        this function **can** be differentiated with algopy

    """
    a, b, v = transform_params(Y)

    Q = algopy.zeros((4,4), dtype=Y)
    Q[0,0] = 0;    Q[0,1] = a;    Q[0,2] = b;    Q[0,3] = b;
    Q[1,0] = a;    Q[1,1] = 0;    Q[1,2] = b;    Q[1,3] = b;
    Q[2,0] = b;    Q[2,1] = b;    Q[2,2] = 0;    Q[2,3] = a;
    Q[3,0] = b;    Q[3,1] = b;    Q[3,2] = a;    Q[3,3] = 0;

    Q = algopy.dot(Q, algopy.diag(v))
    Q -= algopy.diag(algopy.sum(Q, axis=1))
    va = algopy.diag(algopy.sqrt(v))
    vb = algopy.diag(1./algopy.sqrt(v))
    W, U = algopy.eigh(algopy.dot(algopy.dot(va, Q), vb))
    M = algopy.dot(U, algopy.dot(algopy.diag(algopy.exp(W)), U.T))
    P = algopy.dot(vb, algopy.dot(M, va))
    S = algopy.log(algopy.dot(algopy.diag(v), P))
    return -algopy.sum(S * g_data)
def get_Q_unconstrained_kb(
        gtr, syn, nonsyn, compo, asym_compo,
        h,
        log_counts,
        log_mu, log_g, log_omega, d, log_kb, log_nt_weights):
    mu = algopy.exp(log_mu)
    g = algopy.exp(log_g)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp(
            algopy.dot(asym_compo, log_nt_weights)) * h(S, d, log_kb)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
Exemple #24
0
def objective(distn, dwell, trans, log_params):
    """
    Get the expected negative log likelihood.

    This is a helper function for the EM.

    """
    params = algopy.exp(log_params)
    ll_distn = algopy.dot(distn, algopy.log(get_distn(params)))
    ll_dwell = -algopy.dot(get_rates_out(params), dwell)
    ll_trans_01 = trans[0, 1] * log_params[0]
    ll_trans_10 = trans[1, 0] * log_params[1]
    ll = ll_distn + ll_dwell + ll_trans_01 + ll_trans_10
    return -ll
Exemple #25
0
    def eval_f_eigh(self, Y):
        """
        reformulation of eval_f(Y) to use eigh instead of expm
        """

        a, b, v = self.transform_params(Y)

        g_data = numpy.array([
                [2954, 141, 17, 16],
                [165, 1110, 5, 2],
                [18, 4, 3163, 374],
                [15, 2, 310, 2411],
                ],dtype=float)


        Q = zeros((4,4), dtype=Y)
        Q[0,0] = 0;    Q[0,1] = a;    Q[0,2] = b;    Q[0,3] = b;
        Q[1,0] = a;    Q[1,1] = 0;    Q[1,2] = b;    Q[1,3] = b;
        Q[2,0] = b;    Q[2,1] = b;    Q[2,2] = 0;    Q[2,3] = a;
        Q[3,0] = b;    Q[3,1] = b;    Q[3,2] = a;    Q[3,3] = 0;

        Q = dot(Q, diag(v))
        Q -= diag(sum(Q, axis=1))
        va = diag(sqrt(v))
        vb = diag(1./sqrt(v))
        W, U = eigh(dot(dot(va, Q), vb))
        M = dot(U, dot(diag(exp(W)), U.T))
        P = dot(vb, dot(M, va))
        S = log(dot(diag(v), P))
        return -sum(S * g_data)
Exemple #26
0
def unconstrained_recessivity_fixation(
        adjacency,
        kimura_d,
        S,
        ):
    """
    This should be compatible with algopy.
    But it may be very slow.
    @param adjacency: a binary design matrix to reduce unnecessary computation
    @param kimura_d: a parameter that might carry Taylor information
    @param S: an ndarray of selection differences with Taylor information
    return: an ndarray of fixation probabilities with Taylor information
    """
    x = g_quad_x
    w = g_quad_w
    nstates = S.shape[0]
    D = algopy.sign(S) * kimura_d
    H = algopy.zeros_like(S)
    for i in range(nstates):
        for j in range(nstates):
            if not adjacency[i, j]:
                continue
            tmp_a = - S[i, j] * x
            tmp_b = algopy.exp(tmp_a * (D[i, j] * (1-x) + 1))
            tmp_c = algopy.dot(tmp_b, w)
            H[i, j] = algopy.reciprocal(tmp_c)
    return H
Exemple #27
0
def get_Q_unconstrained_kb(
        ts, tv, syn, nonsyn, compo, asym_compo,
        h,
        log_counts,
        log_mu, log_kappa, log_omega, d, log_kb, log_nt_weights):
    """
    This adds yet another parameter.
    """
    #FIXME: constructing this each time seems wasteful
    codon_neighbor_mask = ts + tv
    #FIXME: this is being hacked to use fixed-order quadrature
    #FIXME: and to disregard the h parameter
    mu = algopy.exp(log_mu)
    kappa = algopy.exp(log_kappa)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    H = get_fixation_unconstrained_kb_fquad(
            S, d, log_kb, g_quad_x, g_quad_w, codon_neighbor_mask)
    #H = get_fixation_unconstrained_kb_fquad_cython(
            #S, d, log_kb, codon_neighbor_mask)
    pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp(
            algopy.dot(asym_compo, log_nt_weights)) * H
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
Exemple #28
0
def get_Q(ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu,
          log_kappa, log_omega, log_nt_weights):
    """
    Notation is from Yang and Nielsen 2008.
    The first group of args consists of precomputed ndarrays.
    The second group is only the fixation function.
    The third group consists of empirically (non-free) estimated parameters.
    The fourth group depends only on free parameters.
    Speed matters.
    @param ts: indicator for transition
    @param tv: indicator for transversion
    @param syn: indicator for synonymous codons
    @param nonsyn: indicator for nonsynonymous codons
    @param compo: site independent nucleotide composition per codon
    @param asym_compo: tensor from get_asym_compo function
    @param h: fixation function
    @param log_counts: empirically counted codons in the data set
    @param log_mu: free param for scaling
    @param log_kappa: free param for transition transversion rate distinction
    @param log_omega: free param for syn nonsyn rate distinction
    @param log_nt_weights: mostly free param array for mutation equilibrium
    @return: rate matrix
    """
    mu = algopy.exp(log_mu)
    kappa = algopy.exp(log_kappa)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp(
        algopy.dot(asym_compo, log_nt_weights)) * h(S)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
Exemple #29
0
def eval_f(Y):
    """ some reformulations to make eval_f_orig
        compatible with algopy

        missing: support for scipy.linalg.expm

        i.e., this function can't be differentiated with algopy

    """

    a, b, v = transform_params(Y)

    Q = algopy.zeros((4, 4), dtype=Y)
    Q[0, 0] = 0
    Q[0, 1] = a
    Q[0, 2] = b
    Q[0, 3] = b
    Q[1, 0] = a
    Q[1, 1] = 0
    Q[1, 2] = b
    Q[1, 3] = b
    Q[2, 0] = b
    Q[2, 1] = b
    Q[2, 2] = 0
    Q[2, 3] = a
    Q[3, 0] = b
    Q[3, 1] = b
    Q[3, 2] = a
    Q[3, 3] = 0

    Q = Q * v
    Q -= algopy.diag(algopy.sum(Q, axis=1))
    P = algopy.expm(Q)
    S = algopy.log(algopy.dot(algopy.diag(v), P))
    return -algopy.sum(S * g_data)
Exemple #30
0
        def Cfcn(F1p_list, out=None, work=None):
            from numpy import sum, zeros
            from algopy import inv, dot, zeros

            # check input arguments
            Nex = len(F1p_list)
            Np = F1p_list[0].shape[1]

            # create temporary matrix M if not provided
            # to store M = [[J_1^T J_1, J_2^T],[J_2, 0]]
            if work is None:
                work = zeros((Np, Np), dtype=F1p_list[0])
            M = work

            # Step 1:   compute M = J_1^T J_1
            for nex in range(Nex):
                M += dot(F1p_list[nex].T, F1p_list[nex])

            # Step 2: invert M and prepare output

            if out is None:
                out = inv(M)
            else:
                out[...] = M
            return out
def get_log_likelihood(pre_Q_prefix, pre_Q_suffix, v, subs_counts):
    """
    The stationary distribution of P is empirically derived.
    It is proportional to the codon counts by construction.
    @param pre_Q_prefix: component of hadamard decomposition of pre_Q
    @param pre_Q_suffix: component of hadamard decomposition of pre_Q
    @param v: stationary distribution proportional to observed codon counts
    @param subs_counts: observed substitution counts
    """
    Q = get_Q(pre_Q_prefix, pre_Q_suffix)
    #
    P = algopy.expm(Q)
    #
    # This untested eigh approach is way too slow because of the algopy eigh.
    """
    Da = numpy.diag(numpy.sqrt(v))
    Db = numpy.diag(numpy.reciprocal(numpy.sqrt(v)))
    Q_symmetrized = algopy.dot(Da, algopy.dot(Q, Db))
    w, V = algopy.eigh(Q_symmetrized)
    W_exp = algopy.diag(algopy.exp(w))
    P_symmetrized = algopy.dot(V, algopy.dot(W_exp, V.T))
    P = algopy.dot(Db, algopy.dot(P_symmetrized, Da))
    """
    #
    log_score_matrix = algopy.log(algopy.dot(algopy.diag(v), P))
    log_likelihood = algopy.sum(log_score_matrix * subs_counts)
    return log_likelihood
Exemple #32
0
        def Cfcn(F1p_list, out = None, work = None):
            from numpy import sum, zeros
            from algopy import inv, dot, zeros

            # check input arguments
            Nex  = len(F1p_list)
            Np   = F1p_list[0].shape[1]

            # create temporary matrix M if not provided
            # to store M = [[J_1^T J_1, J_2^T],[J_2, 0]]
            if work is None:
                work = zeros((Np,Np), dtype=F1p_list[0])
            M = work

            # Step 1:   compute M = J_1^T J_1
            for nex in range(Nex):
                M += dot(F1p_list[nex].T, F1p_list[nex])

            # Step 2: invert M and prepare output

            if out is None:
                out = inv(M)
            else:
                out[...] = M
            return out
Exemple #33
0
def get_Q(
        ts, tv, syn, nonsyn, compo, asym_compo,
        h,
        log_counts,
        log_mu, log_kappa, log_omega, log_nt_weights):
    """
    Notation is from Yang and Nielsen 2008.
    The first group of args consists of precomputed ndarrays.
    The second group is only the fixation function.
    The third group consists of empirically (non-free) estimated parameters.
    The fourth group depends only on free parameters.
    Speed matters.
    @param ts: indicator for transition
    @param tv: indicator for transversion
    @param syn: indicator for synonymous codons
    @param nonsyn: indicator for nonsynonymous codons
    @param compo: site independent nucleotide composition per codon
    @param asym_compo: tensor from get_asym_compo function
    @param h: fixation function
    @param log_counts: empirically counted codons in the data set
    @param log_mu: free param for scaling
    @param log_kappa: free param for transition transversion rate distinction
    @param log_omega: free param for syn nonsyn rate distinction
    @param log_nt_weights: mostly free param array for mutation equilibrium
    @return: rate matrix
    """
    mu = algopy.exp(log_mu)
    kappa = algopy.exp(log_kappa)
    omega = algopy.exp(log_omega)
    F = get_selection_F(log_counts, compo, log_nt_weights)
    S = get_selection_S(F)
    pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp(
            algopy.dot(asym_compo, log_nt_weights)) * h(S)
    Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    return Q
Exemple #34
0
    def eval_f(self, Y):
        """
        using algopy.expm
        """

        a, b, v = self.transform_params(Y)

        g_data = numpy.array([
                [2954, 141, 17, 16],
                [165, 1110, 5, 2],
                [18, 4, 3163, 374],
                [15, 2, 310, 2411],
                ],dtype=float)


        Q = zeros((4,4), dtype=Y)
        Q[0,0] = 0;    Q[0,1] = a;    Q[0,2] = b;    Q[0,3] = b;
        Q[1,0] = a;    Q[1,1] = 0;    Q[1,2] = b;    Q[1,3] = b;
        Q[2,0] = b;    Q[2,1] = b;    Q[2,2] = 0;    Q[2,3] = a;
        Q[3,0] = b;    Q[3,1] = b;    Q[3,2] = a;    Q[3,3] = 0;

        Q = Q * v
        Q -= diag(sum(Q, axis=1))
        P = expm(Q)
        S = log(dot(diag(v), P))
        return -sum(S * g_data)
def eval_f(Y):
    """ some reformulations to make eval_f_orig
        compatible with algopy

        missing: support for scipy.linalg.expm

        i.e., this function can't be differentiated with algopy

    """

    a, b, v = transform_params(Y)

    Q = algopy.zeros((4,4), dtype=Y)
    Q[0,0] = 0;    Q[0,1] = a;    Q[0,2] = b;    Q[0,3] = b;
    Q[1,0] = a;    Q[1,1] = 0;    Q[1,2] = b;    Q[1,3] = b;
    Q[2,0] = b;    Q[2,1] = b;    Q[2,2] = 0;    Q[2,3] = a;
    Q[3,0] = b;    Q[3,1] = b;    Q[3,2] = a;    Q[3,3] = 0;

    Q = Q * v
    Q -= algopy.diag(algopy.sum(Q, axis=1))
    #P = linalg.expm(Q)
    # XXX can I get rid of the 4 on the following line?
    P = algopy_expm(Q, 4)
    S = algopy.log(algopy.dot(algopy.diag(v), P))
    return -algopy.sum(S * g_data)
def eval_covariance_matrix_naive(J1, J2):
    M,N = J1.shape
    K,N = J2.shape
    tmp = zeros((N+K, N+K), dtype=J1)
    tmp[:N,:N] = dot(J1.T,J1)
    tmp[:N,N:] = J2.T
    tmp[N:,:N] = J2
    return inv(tmp)[:N,:N]
def eval_covariance_matrix_naive(J1, J2):
    M, N = J1.shape
    K, N = J2.shape
    tmp = zeros((N + K, N + K), dtype=J1)
    tmp[:N, :N] = dot(J1.T, J1)
    tmp[:N, N:] = J2.T
    tmp[N:, :N] = J2
    return inv(tmp)[:N, :N]
Exemple #38
0
def pre_Q_to_Q(pre_Q, stationary_distn, target_expected_rate):
    """
    Return a matrix with a different diagonal and a different scaling.
    """
    unscaled_Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1))
    r = -algopy.dot(algopy.diag(unscaled_Q), stationary_distn)
    Q = (target_expected_rate / r) * unscaled_Q
    return Q
Exemple #39
0
    def test_broadcasting1(self):

        Q = numpy.random.rand(4, 4)
        w = numpy.random.rand(4)
        t = numpy.random.rand(1)

        x = t
        v = x * w
        M_broadcast = Q * v
        M_dot_diag = dot(Q, diag(v))
        assert_array_almost_equal(M_broadcast, M_dot_diag)

        x = UTPM.init_jacobian(t)
        v = x * w
        M_broadcast = Q * v
        M_dot_diag = dot(Q, diag(v))
        assert_array_almost_equal(M_broadcast.data, M_dot_diag.data)
Exemple #40
0
    def test_broadcasting1(self):

        Q = numpy.random.rand(4, 4)
        w = numpy.random.rand(4)
        t = numpy.random.rand(1)

        x = t
        v = x * w
        M_broadcast = Q * v
        M_dot_diag = dot(Q, diag(v))
        assert_array_almost_equal(M_broadcast, M_dot_diag)

        x = UTPM.init_jacobian(t)
        v = x * w
        M_broadcast = Q * v
        M_dot_diag = dot(Q, diag(v))
        assert_array_almost_equal(M_broadcast.data, M_dot_diag.data)
Exemple #41
0
    def lag(self, x, z, **kwargs):
        """Evaluate Lagrangian at (x, z).

        The constraints and bounds are assumed to be ordered as in
        :meth:`cons_pos` and :meth:`bounds`.
        """
        m = self.m
        nrC = self.nrangeC

        l = self.obj(x)
        # The following ifs are necessary because np.dot returns None
        # when passed empty arrays of objects (i.e., dtype = np.object).
        # This causes AD tools to error out.
        if self.m > 0:
            l -= algopy.dot(z[:m + nrC], self.cons(x))
        if self.nbounds > 0:
            l -= algopy.dot(z[m + nrC:], self.bounds(x))
        return l
def get_errors(M, v, approx_1a, approx_2a, X_side, X_diag):
    nstates = len(M)
    N = np.sum(M[0])
    observed_1a_1 = algopy.zeros(N+1, dtype=v)
    #observed_1a_2 = algopy.zeros(N+1, dtype=v)
    observed_2a = algopy.zeros(N+1, dtype=v)
    """
    for i in range(nstates):
        p = v[i]
        AB, Ab, aB, ab = M[i].tolist()
        observed_1a_1[AB + Ab] += p
        #observed_1a_2[AB + aB] += p
        #observed_2a[AB + ab] += p
        observed_2a[Ab + aB] += p
    """
    observed_1a_1 = algopy.dot(v, X_side)
    observed_2a = algopy.dot(v, X_diag)
    #
    errors_1a_1 = observed_1a_1 - approx_1a
    #errors_1a_2 = observed_1a_2 - approx_1a
    errors_2a = observed_2a - approx_2a
    #FIXME: Use algopy.hstack when it becomes available.
    #FIXME: using the workaround http://projects.scipy.org/scipy/ticket/1454
    #FIXME: but this padding of the errors with zeros should not be necessary
    #nonunif_penalty = 0.01
    #nonunif = v - np.ones(nstates) / float(nstates)
    #nonunif = np.zeros(nstates)
    errors = algopy.zeros(
            #len(nonunif) +
            errors_1a_1.shape[0] +
            #len(errors_1a_2) +
            errors_2a.shape[0],
            dtype=v,
            )
    index = 0
    errors[index:index+errors_1a_1.shape[0]] = errors_1a_1
    index += errors_1a_1.shape[0]
    #errors[index:index+len(errors_1a_2)] = errors_1a_2
    #index += len(errors_1a_2)
    errors[index:index+errors_2a.shape[0]] = errors_2a
    index += errors_2a.shape[0]
    #errors[index:index+len(nonunif)] = nonunif_penalty * nonunif
    #index += len(nonunif)
    return errors
Exemple #43
0
def get_neg_ll(vY, mX, vBeta):
    """
    @param vY: predefined numpy array
    @param mX: predefined numpy array
    @param vBeta: parameters of the likelihood function
    """
    #FIXME: algopy could benefit from the addition of a logsumexp function...
    alpha = algopy.dot(mX, vBeta)
    return algopy.sum(vY * algopy.log1p(algopy.exp(-alpha)) +
                      (1 - vY) * algopy.log1p(algopy.exp(alpha)))
Exemple #44
0
def get_MG_pre_Q(ts, tv, syn, nonsyn, asym_compo, nt_distn, kappa, omega):
    """
    This model is nested in FMutSel-F from which this code was copypasted.
    """
    if nt_distn.shape != (4,):
        raise Exception(nt_distn.shape)
    A = (omega * nonsyn + syn) * (kappa * ts + tv)
    B = algopy.dot(asym_compo, nt_distn)
    pre_Q = A * B
    return pre_Q
Exemple #45
0
def get_neg_ll(y, X, theta):
    alpha = theta[-1]
    beta = theta[:-1]
    a = alpha * algopy.exp(algopy.dot(X, beta))
    ll = algopy.sum(-y * algopy.log1p(1 / a) + -algopy.log1p(a) / alpha +
                    algopy.special.gammaln(y + 1 / alpha) +
                    -algopy.special.gammaln(y + 1) +
                    -algopy.special.gammaln(1 / alpha))
    neg_ll = -ll
    return neg_ll
def get_neg_ll(vY, mX, vBeta):
    """
    @param vY: predefined numpy array
    @param mX: predefined numpy array
    @param vBeta: parameters of the likelihood function
    """
    #FIXME: algopy could benefit from the addition of a logsumexp function...
    alpha = algopy.dot(mX, vBeta)
    return algopy.sum(
            vY*algopy.log1p(algopy.exp(-alpha)) +
            (1-vY)*algopy.log1p(algopy.exp(alpha)))
def clever_cross_entropy_trees(B, nleaves, va, vb):
    """
    Try being a little more clever.

    @param B: augmented incidence matrix
    @param nleaves: number of leaves
    @param va: augmented reference point edge variances
    @param vb: augmented test point edge variances
    """

    # deduce some quantities assuming an unrooted bifurcating tree
    ninternal = nleaves - 2
    nvertices = nleaves + ninternal
    nedges = nvertices - 1

    # define an index for taking schur complements
    n = nvertices
    k = nleaves + 1

    # Construct the full Laplacian matrix plus J/n.
    # Take a block of the diagonal, corresponding to the inverse
    # of a schur complement.
    Wa = diag(reciprocal(va))
    La_plus = dot(B.T, dot(Wa, B))
    print(La_plus)
    print(scipy.linalg.eigh(La_plus))
    Laa = La_plus[:k, :k]
    Lab = La_plus[:k, k:]
    Lba = La_plus[k:, :k]
    Lbb = La_plus[k:, k:]
    L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba))
    assert_allclose(inv(L_schur_plus), inv(La_plus)[:k, :k])
    A = inv(La_plus)[:k, :k]
    print(scipy.linalg.eigh(A))

    # Construct the Schur complement of the test point matrix.
    Wb = diag(reciprocal(vb))
    L_plus = dot(B.T, dot(Wb, B))
    Laa = L_plus[:k, :k]
    Lab = L_plus[:k, k:]
    Lba = L_plus[k:, :k]
    Lbb = L_plus[k:, k:]
    L_schur_plus = Laa - dot(Lab, dot(inv(Lbb), Lba))
    B_inv = L_schur_plus
    #return 0.5 * ((n-1) * LOG2PI + trace(dot(B_inv, A)) - log(det(B_inv)))
    return 0.5 * (n * LOG2PI + trace(dot(B_inv, A) - 1) - log(det(B_inv)))
Exemple #48
0
def denom_fixed_quad(c, d, x, w):
    """
    This function is compatible with algopy.
    The x and w params should be precomputed with a=0, b=1.
    @param c: large positive means mutant is more fit
    @param d: large positive means mutant is dominant as opposed to recessive
    @param x: quadrature points in the interval [0, 1]
    @param w: corresponding nonneg quadrature weights summing to 1
    """
    neg_two_c_x = -2*c*x
    y = algopy.exp(neg_two_c_x*(d*(1-x) + 1))
    return algopy.dot(y, w)
Exemple #49
0
def house(x):
    """ computes the Householder vector v and twice its norm beta
    
    (v,beta) = house(x)
    
    Parameters
    ----------
    x: array_like
        len(x) = N
    
    Returns
    -------
    v: array_like
        len(v) = N
    
    beta: Float
        two times the 2-norm of v
        
    Description
    -----------
    computes beta and v to be used in the Householder reflector
    H(v) = 1 - beta dot(v,v.T)
    where v[0] = 1
    such that H(v)x = alpha * e_1
    i.e., H(v)x is a multiple of the first Cartesian basis vector
    """
    
    sigma = algopy.sqrt(algopy.dot(x.T,x))[0,0]
    
    v = x.copy()
    if x[0] <= 0:
        v[0] -= sigma

    else:
        v[0] += sigma
    
    v = v/v[0]
    beta = 2./algopy.dot(v.T,v)[0,0]
    
    return v, beta
Exemple #50
0
def qr_house(A):
    """ computes QR decomposition using Householder relections
    
    (Q,R) = qr_house(A)
    
    such that 
    0 = Q R - A
    0 = dot(Q.T,Q) - eye(M)
    R upper triangular
    
    Parameters
    ----------
    A: array_like
       shape(A) = (M, N), M >= N
       overwritten on exit
    
    Returns
    -------
    R: array_like
        strict lower triangular part contains the Householder vectors v
        upper triangular matrix R
    
    Q: array_like
        orthogonal matrix

    """
    
    M,N = A.shape
    Q = algopy.zeros((M,M),dtype=A)
    Q += numpy.eye(M)
    H = algopy.zeros((M,M),dtype=A)
    for n in range(N):
        v,beta = house(A[n:,n:n+1])
        A[n:,n:] -= beta * algopy.dot(v, algopy.dot(v.T,A[n:,n:]))
        H[...] = numpy.eye(M)
        H[n:,n:] -= beta * algopy.dot(v,v.T)
        Q = algopy.dot(Q,H)
        
    return Q, algopy.triu(A)
Exemple #51
0
def get_selection_F(log_counts, compo, log_nt_weights):
    """
    The F and S notation is from Yang and Nielsen 2008.
    Note that three of the four log nt weights are free parameters.
    One of the four log weights is zero and the other three
    are free parameters to be estimated jointly in the
    maximimum likelihood search,
    so this function is inside the optimization loop.
    @param log_counts: logs of empirical codon counts
    @param compo: codon composition as defined in the get_compo function
    @param log_nt_weights: un-normalized log mutation process probabilities
    @return: a log selection for each codon, up to an additive constant
    """
    return log_counts - algopy.dot(compo, log_nt_weights)
Exemple #52
0
def eval_f_explicit(subs_counts, v, Y):
    """
    Note that Y is last for compatibility with functools.partial.
    It is convenient for usage with numdifftools, although this parameter
    ordering is the opposite of the convention of scipy.optimize.
    @return: negative log likelihood
    @param Y: parameters to jointly estimate
    @param subs_counts: observed data
    @param v: fixed equilibrium probabilities for states
    """
    P = create_transition_matrix_explicit(Y, v)
    vdiag = algopy.diag(v)
    J = algopy.dot(vdiag, P)
    S = algopy.log(J)
    return -algopy.sum(S * subs_counts)
Exemple #53
0
    def eval_f_eigh(self, Y):
        """
        reformulation of eval_f(Y) to use eigh instead of expm
        """

        a, b, v = self.transform_params(Y)

        g_data = numpy.array([
            [2954, 141, 17, 16],
            [165, 1110, 5, 2],
            [18, 4, 3163, 374],
            [15, 2, 310, 2411],
        ],
                             dtype=float)

        Q = zeros((4, 4), dtype=Y)
        Q[0, 0] = 0
        Q[0, 1] = a
        Q[0, 2] = b
        Q[0, 3] = b
        Q[1, 0] = a
        Q[1, 1] = 0
        Q[1, 2] = b
        Q[1, 3] = b
        Q[2, 0] = b
        Q[2, 1] = b
        Q[2, 2] = 0
        Q[2, 3] = a
        Q[3, 0] = b
        Q[3, 1] = b
        Q[3, 2] = a
        Q[3, 3] = 0

        Q = dot(Q, diag(v))
        Q -= diag(sum(Q, axis=1))
        va = diag(sqrt(v))
        vb = diag(1. / sqrt(v))
        W, U = eigh(dot(dot(va, Q), vb))
        M = dot(U, dot(diag(exp(W)), U.T))
        P = dot(vb, dot(M, va))
        S = log(dot(diag(v), P))
        return -sum(S * g_data)
Exemple #54
0
def get_Q_prefix_gtr(gtr, syn, nonsyn, log_mu, log_gtr_exch, log_omega):
    """
    Compute a chunk of a hadamard decomposition of the pre-Q matrix.
    By hadamard decomposition I mean the factoring of a matrix
    into the entrywise product of two matrices.
    By pre-Q matrix I mean the rate matrix before the row sums
    have been subtracted from the diagonal.
    Notation is from Yang and Nielsen 2008.
    The first group of args consists of precomputed ndarrays.
    The second group depends only on free parameters.
    Note that this function does not depend on mutation process
    stationary distribution parameters,
    and it does not depend on recessivity parameters.
    """
    mu = algopy.exp(log_mu)
    gtr_exch = algopy.exp(log_gtr_exch)
    omega = algopy.exp(log_omega)
    return mu * algopy.dot(gtr, gtr_exch) * (omega * nonsyn + syn)
Exemple #55
0
def eval_f_eigh(Y):
    """ some reformulations to make eval_f_orig
        compatible with algopy

        replaced scipy.linalg.expm by a symmetric eigenvalue decomposition

        this function **can** be differentiated with algopy

    """
    a, b, v = transform_params(Y)

    Q = algopy.zeros((4, 4), dtype=Y)
    Q[0, 0] = 0
    Q[0, 1] = a
    Q[0, 2] = b
    Q[0, 3] = b
    Q[1, 0] = a
    Q[1, 1] = 0
    Q[1, 2] = b
    Q[1, 3] = b
    Q[2, 0] = b
    Q[2, 1] = b
    Q[2, 2] = 0
    Q[2, 3] = a
    Q[3, 0] = b
    Q[3, 1] = b
    Q[3, 2] = a
    Q[3, 3] = 0

    Q = algopy.dot(Q, algopy.diag(v))
    Q -= algopy.diag(algopy.sum(Q, axis=1))
    va = algopy.diag(algopy.sqrt(v))
    vb = algopy.diag(1. / algopy.sqrt(v))
    W, U = algopy.eigh(algopy.dot(algopy.dot(va, Q), vb))
    M = algopy.dot(U, algopy.dot(algopy.diag(algopy.exp(W)), U.T))
    P = algopy.dot(vb, algopy.dot(M, va))
    S = algopy.log(algopy.dot(algopy.diag(v), P))
    return -algopy.sum(S * g_data)
Exemple #56
0
    def eval_f(self, Y):
        """
        using algopy.expm
        """

        a, b, v = self.transform_params(Y)

        g_data = numpy.array([
            [2954, 141, 17, 16],
            [165, 1110, 5, 2],
            [18, 4, 3163, 374],
            [15, 2, 310, 2411],
        ],
                             dtype=float)

        Q = zeros((4, 4), dtype=Y)
        Q[0, 0] = 0
        Q[0, 1] = a
        Q[0, 2] = b
        Q[0, 3] = b
        Q[1, 0] = a
        Q[1, 1] = 0
        Q[1, 2] = b
        Q[1, 3] = b
        Q[2, 0] = b
        Q[2, 1] = b
        Q[2, 2] = 0
        Q[2, 3] = a
        Q[3, 0] = b
        Q[3, 1] = b
        Q[3, 2] = a
        Q[3, 3] = 0

        Q = Q * v
        Q -= diag(sum(Q, axis=1))
        P = expm(Q)
        S = log(dot(diag(v), P))
        return -sum(S * g_data)