def eval_f(Y): """ some reformulations to make eval_f_orig compatible with algopy missing: support for scipy.linalg.expm i.e., this function can't be differentiated with algopy """ a, b, v = transform_params(Y) Q = algopy.zeros((4,4), dtype=Y) Q[0,0] = 0; Q[0,1] = a; Q[0,2] = b; Q[0,3] = b; Q[1,0] = a; Q[1,1] = 0; Q[1,2] = b; Q[1,3] = b; Q[2,0] = b; Q[2,1] = b; Q[2,2] = 0; Q[2,3] = a; Q[3,0] = b; Q[3,1] = b; Q[3,2] = a; Q[3,3] = 0; Q = Q * v Q -= algopy.diag(algopy.sum(Q, axis=1)) #P = linalg.expm(Q) # XXX can I get rid of the 4 on the following line? P = algopy_expm(Q, 4) S = algopy.log(algopy.dot(algopy.diag(v), P)) return -algopy.sum(S * g_data)
def eval_f_eigh(Y): """ some reformulations to make eval_f_orig compatible with algopy replaced scipy.linalg.expm by a symmetric eigenvalue decomposition this function **can** be differentiated with algopy """ a, b, v = transform_params(Y) Q = algopy.zeros((4,4), dtype=Y) Q[0,0] = 0; Q[0,1] = a; Q[0,2] = b; Q[0,3] = b; Q[1,0] = a; Q[1,1] = 0; Q[1,2] = b; Q[1,3] = b; Q[2,0] = b; Q[2,1] = b; Q[2,2] = 0; Q[2,3] = a; Q[3,0] = b; Q[3,1] = b; Q[3,2] = a; Q[3,3] = 0; Q = algopy.dot(Q, algopy.diag(v)) Q -= algopy.diag(algopy.sum(Q, axis=1)) va = algopy.diag(algopy.sqrt(v)) vb = algopy.diag(1./algopy.sqrt(v)) W, U = algopy.eigh(algopy.dot(algopy.dot(va, Q), vb)) M = algopy.dot(U, algopy.dot(algopy.diag(algopy.exp(W)), U.T)) P = algopy.dot(vb, algopy.dot(M, va)) S = algopy.log(algopy.dot(algopy.diag(v), P)) return -algopy.sum(S * g_data)
def guess_branch_length(subs_counts): """ Make a very crude guess of expected number of changes along a branch. @param subs_counts: an (nstates, nstates) ndarray of observed substitutions @return: crude guess of expected number of changes along the branch """ total_count = algopy.sum(subs_counts) diag_count = algopy.sum(algopy.diag(subs_counts)) crude_estimate = (total_count - diag_count) / float(total_count) return crude_estimate
def get_f1x4_codon_distn(compo, nt_distn): """ The f1x4 notation is from e.g. Table (1) of Yang and Nielsen 1998. @param compo: a (ncodons, 4) design matrix defining codon compositions @param nt_distn: empirical or free nucleotide distribution @return: codon distribution """ log_nt_distn = algopy.log(nt_distn) M = log_nt_distn * compo log_codon_distn = algopy.sum(M, axis=-1) codon_kernel = algopy.exp(log_codon_distn) codon_distn = codon_kernel / algopy.sum(codon_kernel) return codon_distn
def get_f3x4_codon_distn(full_compo, nt_distns): """ The f3x4 notation is from e.g. Table (1) of Yang and Nielsen 1998. Although algopy implements most of the functions of numpy, it seems to not have an implementation of the tensordot function. @param full_compo: a (ncodons, 3, 4) binary matrix of codon compositions @param nt_distns: empirical or free nucleotide distributions @return: codon distribution """ log_nt_distns = algopy.log(nt_distns) M = log_nt_distns * full_compo log_codon_distn = algopy.sum(algopy.sum(M, axis=-1), axis=-1) codon_kernel = algopy.exp(log_codon_distn) codon_distn = codon_kernel / algopy.sum(codon_kernel) return codon_distn
def create_transition_matrix_numeric(mu, d, v): """ Use numerical integration. This is not so compatible with algopy because it goes through fortran. Note that d = 2*h - 1 following Kimura 1957. The rate mu is a catch-all scaling factor. The finite distribution v is assumed to be a stochastic vector. @param mu: scales the rate matrix @param d: dominance (as opposed to recessiveness) of preferred states. @param v: numpy array defining a distribution over states @return: transition matrix """ # Construct the numpy matrix whose entries # are differences of log equilibrium probabilities. # Everything in this code block is pure numpy. F = numpy.log(v) e = numpy.ones_like(F) S = numpy.outer(e, F) - numpy.outer(F, e) # Create the rate matrix Q and return its matrix exponential. # Things in this code block may use algopy if mu and d # are bundled with truncated Taylor information. D = d * numpy.sign(S) pre_Q = numpy.vectorize(numeric_fixation)(0.5 * S, D) pre_Q = mu * pre_Q Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) P = algopy.expm(Q) return P
def get_neg_ll(cls, patterns, pattern_weights, ts, tv, syn, nonsyn, full_compo, theta, ): # pick the nt distn parameters from the end of the theta vector log_nt_distns = algopy.zeros((3, 4), dtype=theta) log_nt_distns_block = algopy.reshape(theta[-9:], (3, 3)) log_nt_distns[:, :-1] = log_nt_distns_block reduced_theta = theta[:-9] unnormalized_nt_distns = algopy.exp(log_nt_distns) # normalize each of the three nucleotide distributions row_sums = algopy.sum(unnormalized_nt_distns, axis=1) nt_distns = (unnormalized_nt_distns.T / row_sums).T # get the implied codon distribution stationary_distn = codon1994.get_f3x4_codon_distn( full_compo, nt_distns, ) return A.get_neg_ll( patterns, pattern_weights, stationary_distn, ts, tv, syn, nonsyn, reduced_theta, )
def get_Q_unconstrained_kb( ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_kappa, log_omega, d, log_kb, log_nt_weights): """ This adds yet another parameter. """ #FIXME: constructing this each time seems wasteful codon_neighbor_mask = ts + tv #FIXME: this is being hacked to use fixed-order quadrature #FIXME: and to disregard the h parameter mu = algopy.exp(log_mu) kappa = algopy.exp(log_kappa) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) H = get_fixation_unconstrained_kb_fquad( S, d, log_kb, g_quad_x, g_quad_w, codon_neighbor_mask) #H = get_fixation_unconstrained_kb_fquad_cython( #S, d, log_kb, codon_neighbor_mask) pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * H Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def get_branch_mix_ll(subs_counts, probs, pre_Qs, distn, branch_length): """ This log likelihood calculation function is compatible with algopy. Note that the word 'mix' in the function name does not refer to a mix of branch lengths, but rather to a mixture of unscaled parameterized rate matrices. @param subs_counts: substitution counts @param probs: discrete distribution of mixture probabilities @param pre_Qs: rates with arbitrary scaling and arbitrary diagonals @param distn: initial distribution common to both component processes @param branch_length: expected number of changes @return: log likelihood """ # Get the appropriately time-scaled transition matrices. Ps = get_branch_mix(probs, pre_Qs, [distn, distn], branch_length) # The probability associated with each count is # a convex combination of the probabilities computed with site classes. P_mix = probs[0] * Ps[0] + probs[1] * Ps[1] # Scale the rows of the transition matrix by the initial distribution. # This scaled matrix will be symmetric if the process is reversible. P_mix_scaled = (P_mix.T * distn).T # Use the probability transition matrix and the substitution counts # to compute the log likelihood. return algopy.sum(algopy.log(P_mix_scaled) * subs_counts)
def f_eqcons( theta, subs_counts, log_counts, v, h, ts, tv, syn, nonsyn, compo, asym_compo, ): #FIXME: obsolete #FIXME: hardcoded for selection without recessivity parameters # # Init the array of values that should be zero when the equality # constraints are satisfied. equality_violations = algopy.zeros(2, dtype=theta) """ # # Add the equality constraint for the expected rate. # This is easily computed through the rate matrix. Q = get_Q_slsqp( ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, v, theta) expected_rate = -algopy.dot(algopy.diag(Q), v) equality_violations[0] = theta[0] - expected_rate """ # # Add the equality constraint for the mutational process # nucleotide equilibrium distribution. equality_violations[0] = algopy.sum(theta[-4:]) # return equality_violations
def get_Q(ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_kappa, log_omega, log_nt_weights): """ Notation is from Yang and Nielsen 2008. The first group of args consists of precomputed ndarrays. The second group is only the fixation function. The third group consists of empirically (non-free) estimated parameters. The fourth group depends only on free parameters. Speed matters. @param ts: indicator for transition @param tv: indicator for transversion @param syn: indicator for synonymous codons @param nonsyn: indicator for nonsynonymous codons @param compo: site independent nucleotide composition per codon @param asym_compo: tensor from get_asym_compo function @param h: fixation function @param log_counts: empirically counted codons in the data set @param log_mu: free param for scaling @param log_kappa: free param for transition transversion rate distinction @param log_omega: free param for syn nonsyn rate distinction @param log_nt_weights: mostly free param array for mutation equilibrium @return: rate matrix """ mu = algopy.exp(log_mu) kappa = algopy.exp(log_kappa) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def get_Q(gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, log_nt_weights): """ Most of the notation is from Yang and Nielsen 2008. The first group of args consists of precomputed ndarrays. The second group is only the fixation function. The third group consists of empirically (non-free) estimated parameters. The fourth group depends only on free parameters. @param gtr: ndim-3 ndarray indicating the nucleotide exchange type @param syn: indicator for synonymous codons @param nonsyn: indicator for nonsynonymous codons @param compo: site independent nucleotide composition per codon @param asym_compo: tensor from get_asym_compo function @param h: fixation function @param log_counts: empirically counted codons in the data set @param log_mu: free param for scaling @param log_g: logs of six exchangeabilities @param log_omega: free param for syn nonsyn rate distinction @param log_nt_weights: mostly free param array for mutation equilibrium @return: rate matrix """ mu = algopy.exp(log_mu) g = algopy.exp(log_g) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def create_transition_matrix_numeric(mu, d, v): """ Use numerical integration. This is not so compatible with algopy because it goes through fortran. Note that d = 2*h - 1 following Kimura 1957. The rate mu is a catch-all scaling factor. The finite distribution v is assumed to be a stochastic vector. @param mu: scales the rate matrix @param d: dominance (as opposed to recessiveness) of preferred states. @param v: numpy array defining a distribution over states @return: transition matrix """ # Construct the numpy matrix whose entries # are differences of log equilibrium probabilities. # Everything in this code block is pure numpy. F = numpy.log(v) e = numpy.ones_like(F) S = numpy.outer(e, F) - numpy.outer(F, e) # Create the rate matrix Q and return its matrix exponential. # Things in this code block may use algopy if mu and d # are bundled with truncated Taylor information. D = d * numpy.sign(S) pre_Q = numpy.vectorize(numeric_fixation)(0.5*S, D) pre_Q = mu * pre_Q Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) P = algopy.expm(Q) return P
def get_log_likelihood(pre_Q_prefix, pre_Q_suffix, v, subs_counts): """ The stationary distribution of P is empirically derived. It is proportional to the codon counts by construction. @param pre_Q_prefix: component of hadamard decomposition of pre_Q @param pre_Q_suffix: component of hadamard decomposition of pre_Q @param v: stationary distribution proportional to observed codon counts @param subs_counts: observed substitution counts """ Q = get_Q(pre_Q_prefix, pre_Q_suffix) # P = algopy.expm(Q) # # This untested eigh approach is way too slow because of the algopy eigh. """ Da = numpy.diag(numpy.sqrt(v)) Db = numpy.diag(numpy.reciprocal(numpy.sqrt(v))) Q_symmetrized = algopy.dot(Da, algopy.dot(Q, Db)) w, V = algopy.eigh(Q_symmetrized) W_exp = algopy.diag(algopy.exp(w)) P_symmetrized = algopy.dot(V, algopy.dot(W_exp, V.T)) P = algopy.dot(Db, algopy.dot(P_symmetrized, Da)) """ # log_score_matrix = algopy.log(algopy.dot(algopy.diag(v), P)) log_likelihood = algopy.sum(log_score_matrix * subs_counts) return log_likelihood
def get_Q( ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_kappa, log_omega, log_nt_weights): """ Notation is from Yang and Nielsen 2008. The first group of args consists of precomputed ndarrays. The second group is only the fixation function. The third group consists of empirically (non-free) estimated parameters. The fourth group depends only on free parameters. Speed matters. @param ts: indicator for transition @param tv: indicator for transversion @param syn: indicator for synonymous codons @param nonsyn: indicator for nonsynonymous codons @param compo: site independent nucleotide composition per codon @param asym_compo: tensor from get_asym_compo function @param h: fixation function @param log_counts: empirically counted codons in the data set @param log_mu: free param for scaling @param log_kappa: free param for transition transversion rate distinction @param log_omega: free param for syn nonsyn rate distinction @param log_nt_weights: mostly free param array for mutation equilibrium @return: rate matrix """ mu = algopy.exp(log_mu) kappa = algopy.exp(log_kappa) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def get_Q_slsqp( ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, v, theta): #FIXME: hardcoded for selection without recessivity parameters # # unpack theta branch_length = theta[0] kappa = theta[1] omega = theta[2] """ nt_probs = algopy.zeros(4, dtype=theta) nt_probs[0] = theta[3] nt_probs[1] = theta[4] nt_probs[2] = theta[5] nt_probs[3] = 1.0 - algopy.sum(nt_probs) print nt_probs log_nt_weights = algopy.log(nt_probs) """ log_nt_weights = theta[-4:] # F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q_exch = (kappa * ts + tv) * (omega * nonsyn + syn) pre_Q = pre_Q_exch * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S) rates = algopy.sum(pre_Q, axis=1) Q = pre_Q - algopy.diag(rates) Q *= branch_length / algopy.dot(rates, v) return Q
def get_Q( gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, log_nt_weights): """ Most of the notation is from Yang and Nielsen 2008. The first group of args consists of precomputed ndarrays. The second group is only the fixation function. The third group consists of empirically (non-free) estimated parameters. The fourth group depends only on free parameters. @param gtr: ndim-3 ndarray indicating the nucleotide exchange type @param syn: indicator for synonymous codons @param nonsyn: indicator for nonsynonymous codons @param compo: site independent nucleotide composition per codon @param asym_compo: tensor from get_asym_compo function @param h: fixation function @param log_counts: empirically counted codons in the data set @param log_mu: free param for scaling @param log_g: logs of six exchangeabilities @param log_omega: free param for syn nonsyn rate distinction @param log_nt_weights: mostly free param array for mutation equilibrium @return: rate matrix """ mu = algopy.exp(log_mu) g = algopy.exp(log_g) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def pre_Q_to_Q(pre_Q, stationary_distn, target_expected_rate): """ Return a matrix with a different diagonal and a different scaling. """ unscaled_Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) r = -algopy.dot(algopy.diag(unscaled_Q), stationary_distn) Q = (target_expected_rate / r) * unscaled_Q return Q
def get_Q(pre_Q_prefix, pre_Q_suffix): """ @param pre_Q_prefix: this is an algopy aware ndarray @param pre_Q_suffix: this is a numpy ndarray @return: an algopy aware ndarray """ pre_Q = pre_Q_prefix * pre_Q_suffix Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def get_log_likelihood(P, v, subs_counts): """ The stationary distribution of P is empirically derived. It is proportional to the codon counts by construction. @param P: a transition matrix using codon counts and free parameters @param v: stationary distribution proportional to observed codon counts @param subs_counts: observed substitution counts """ return algopy.sum(subs_counts * algopy.log(P.T * v))
def get_log_likelihood(P, v, subs_counts): """ The stationary distribution of P is empirically derived. It is proportional to the codon counts by construction. @param P: a transition matrix using codon counts and free parameters @param v: stationary distribution proportional to observed codon counts @param subs_counts: observed substitution counts """ return algopy.sum(algopy.log(P.T * v) * subs_counts)
def transform_params(Y): X = algopy.exp(Y) tsrate, tvrate = X[0], X[1] v_unnormalized = algopy.zeros(4, dtype=X) v_unnormalized[0] = X[2] v_unnormalized[1] = X[3] v_unnormalized[2] = X[4] v_unnormalized[3] = 1.0 v = v_unnormalized / algopy.sum(v_unnormalized) return tsrate, tvrate, v
def get_neg_ll(vY, mX, vBeta): """ @param vY: predefined numpy array @param mX: predefined numpy array @param vBeta: parameters of the likelihood function """ #FIXME: algopy could benefit from the addition of a logsumexp function... alpha = algopy.dot(mX, vBeta) return algopy.sum(vY * algopy.log1p(algopy.exp(-alpha)) + (1 - vY) * algopy.log1p(algopy.exp(alpha)))
def unpack_distribution(nstates, d4_reduction, d4_nstates, X): log_v = algopy.zeros(nstates, dtype=X) for i_full, i_reduced in enumerate(d4_reduction): if i_reduced == d4_nstates - 1: log_v[i_full] = 0.0 else: log_v[i_full] = X[i_reduced] v = algopy.exp(log_v) v = v / algopy.sum(v) return v
def ratios_to_distn(ratios): """ @param ratios: n-1 ratios of leading prob to the trailing prob @return: a finite distribution over n states """ n = ratios.shape[0] + 1 expanded_ratios = algopy.ones(n, dtype=ratios) expanded_ratios[:-1] = ratios distn = expanded_ratios / algopy.sum(expanded_ratios) return distn
def get_neg_ll(y, X, theta): alpha = theta[-1] beta = theta[:-1] a = alpha * algopy.exp(algopy.dot(X, beta)) ll = algopy.sum(-y * algopy.log1p(1 / a) + -algopy.log1p(a) / alpha + algopy.special.gammaln(y + 1 / alpha) + -algopy.special.gammaln(y + 1) + -algopy.special.gammaln(1 / alpha)) neg_ll = -ll return neg_ll
def get_Q_unconstrained(gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, d, log_nt_weights): mu = algopy.exp(log_mu) g = algopy.exp(log_g) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S, d) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def get_neg_ll(vY, mX, vBeta): """ @param vY: predefined numpy array @param mX: predefined numpy array @param vBeta: parameters of the likelihood function """ #FIXME: algopy could benefit from the addition of a logsumexp function... alpha = algopy.dot(mX, vBeta) return algopy.sum( vY*algopy.log1p(algopy.exp(-alpha)) + (1-vY)*algopy.log1p(algopy.exp(alpha)))
def get_neg_ll(y, X, theta): alpha = theta[-1] beta = theta[:-1] a = alpha * algopy.exp(algopy.dot(X, beta)) ll = algopy.sum( -y*algopy.log1p(1/a) + -algopy.log1p(a) / alpha + algopy.special.gammaln(y + 1/alpha) + -algopy.special.gammaln(y + 1) + -algopy.special.gammaln(1/alpha)) neg_ll = -ll return neg_ll
def eval_f_eigh(Y): """ some reformulations to make eval_f_orig compatible with algopy replaced scipy.linalg.expm by a symmetric eigenvalue decomposition this function **can** be differentiated with algopy """ a, b, v = transform_params(Y) Q = algopy.zeros((4, 4), dtype=Y) Q[0, 0] = 0 Q[0, 1] = a Q[0, 2] = b Q[0, 3] = b Q[1, 0] = a Q[1, 1] = 0 Q[1, 2] = b Q[1, 3] = b Q[2, 0] = b Q[2, 1] = b Q[2, 2] = 0 Q[2, 3] = a Q[3, 0] = b Q[3, 1] = b Q[3, 2] = a Q[3, 3] = 0 Q = algopy.dot(Q, algopy.diag(v)) Q -= algopy.diag(algopy.sum(Q, axis=1)) va = algopy.diag(algopy.sqrt(v)) vb = algopy.diag(1. / algopy.sqrt(v)) W, U = algopy.eigh(algopy.dot(algopy.dot(va, Q), vb)) M = algopy.dot(U, algopy.dot(algopy.diag(algopy.exp(W)), U.T)) P = algopy.dot(vb, algopy.dot(M, va)) S = algopy.log(algopy.dot(algopy.diag(v), P)) return -algopy.sum(S * g_data)
def get_Q_unconstrained_kb( gtr, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_g, log_omega, d, log_kb, log_nt_weights): mu = algopy.exp(log_mu) g = algopy.exp(log_g) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) pre_Q = mu * algopy.dot(gtr, g) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * h(S, d, log_kb) Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def eval_f(Y): """ some reformulations to make eval_f_orig compatible with algopy missing: support for scipy.linalg.expm i.e., this function can't be differentiated with algopy """ a, b, v = transform_params(Y) Q = algopy.zeros((4, 4), dtype=Y) Q[0, 0] = 0 Q[0, 1] = a Q[0, 2] = b Q[0, 3] = b Q[1, 0] = a Q[1, 1] = 0 Q[1, 2] = b Q[1, 3] = b Q[2, 0] = b Q[2, 1] = b Q[2, 2] = 0 Q[2, 3] = a Q[3, 0] = b Q[3, 1] = b Q[3, 2] = a Q[3, 3] = 0 Q = Q * v Q -= algopy.diag(algopy.sum(Q, axis=1)) #P = linalg.expm(Q) # XXX can I get rid of the 4 on the following line? P = algopy_expm(Q, 4) S = algopy.log(algopy.dot(algopy.diag(v), P)) return -algopy.sum(S * g_data)
def get_conditional_log_likelihood(pre_Q_prefix, pre_Q_suffix, subs_counts): """ @param pre_Q_prefix: component of hadamard decomposition of pre_Q @param pre_Q_suffix: component of hadamard decomposition of pre_Q @param subs_counts: observed substitution counts """ # NOTE: this is not the usual log likelihood, # because it is conditional on the initial sequence, # and it is not assumed to be at stationarity with respect to the # pair of diverged sequences. # It is kind of a hack. Q = get_Q(pre_Q_prefix, pre_Q_suffix) P = algopy.expm(Q) log_likelihood = algopy.sum(algopy.log(P) * subs_counts) return log_likelihood
def eval_f_explicit(subs_counts, v, Y): """ Note that Y is last for compatibility with functools.partial. It is convenient for usage with numdifftools, although this parameter ordering is the opposite of the convention of scipy.optimize. @return: negative log likelihood @param Y: parameters to jointly estimate @param subs_counts: observed data @param v: fixed equilibrium probabilities for states """ P = create_transition_matrix_explicit(Y, v) vdiag = algopy.diag(v) J = algopy.dot(vdiag, P) S = algopy.log(J) return -algopy.sum(S * subs_counts)
def get_branch_ll(subs_counts, pre_Q, distn, branch_length): """ This log likelihood calculation function is compatible with algopy. @param subs_counts: substitution counts @param pre_Q: rates with arbitrary scaling and arbitrary diagonals @param distn: initial distribution @param branch_length: expected number of changes @return: log likelihood """ Q = pre_Q_to_Q(pre_Q, distn, branch_length) P = algopy.expm(Q) # Scale the rows of the transition matrix by the initial distribution. # This scaled matrix will be symmetric if the process is reversible. P_scaled = (P.T * distn).T # Use the transition matrix and the substitution counts # to compute the log likelihood. return algopy.sum(algopy.log(P_scaled) * subs_counts)
def get_two_taxon_neg_ll( model, em_probs, em_distns, subs_counts, ts, tv, syn, nonsyn, compo, asym_compo, natural_theta, ): """ Get the negative log likelihood. This function does not use the logarithms. It is mostly for computing the hessian; otherwise the version with the logarithms would probably be better. The first param group is the model implementation. The second param group is expectation-maximization stuff. The third param group is the data. The next param group consists of design matrices related to genetic code. The next param group consist of free parameters of the model. """ # unpack some parameters branch_length = natural_theta[0] natural_model_theta = natural_theta[1:] # compute the appropriately scaled transition matrices pre_Qs = model.get_pre_Qs( em_probs, em_distns, ts, tv, syn, nonsyn, compo, asym_compo, natural_model_theta) eq_distns = model.get_distns( em_probs, em_distns, ts, tv, syn, nonsyn, compo, asym_compo, natural_model_theta) Ps = markovutil.get_branch_mix(em_probs, pre_Qs, eq_distns, branch_length) # compute the mixture transition matrix P_mix = algopy.zeros_like(Ps[0]) P_mix += em_probs[0] * (Ps[0].T * eq_distns[0]).T P_mix += em_probs[1] * (Ps[1].T * eq_distns[1]).T # compute the neg log likelihood neg_ll = -algopy.sum(algopy.log(P_mix) * subs_counts) print neg_ll return neg_ll
def get_branch_mix(probs, pre_Qs, eq_distns, branch_length): """ This log likelihood calculation function is compatible with algopy. Note that the word 'mix' in the function name does not refer to a mix of branch lengths, but rather to a mixture of unscaled parameterized rate matrices. @param probs: discrete distribution of mixture probabilities @param pre_Qs: rates with arbitrary scaling and arbitrary diagonals @param eq_distns: equilibrium distributions @param branch_length: expected number of changes @return: transition matrices """ # Subtract diagonals to give the unscaled rate matrices. # Also compute the expected rates of the unscaled rate matrices. # Use an unnecessarily explicit-looking calculation, # because the entries inside the probs list # and the entries inside the observed expected rates list # each have taylor information, # but the lists themselves are not taylor-aware. # The code could be re-orgainized later so that we are using # more explicitly taylor-aware lists. unscaled_Qs = [] r = 0 for p, pre_Q, eq_distn in zip(probs, pre_Qs, eq_distns): unscaled_Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) unscaled_Qs.append(unscaled_Q) observed_r = -algopy.dot(algopy.diag(unscaled_Q), eq_distn) r = r + p * observed_r # Compute the correctly scaled rate matrices # so that the expected rate of the mixture is equal # to the branch length that has been passed as an argument # to this function. Qs = [] for unscaled_Q in unscaled_Qs: Q = (branch_length / r) * unscaled_Q Qs.append(Q) # Return the appropriately time-scaled transition matrices. return [algopy.expm(Q) for Q in Qs]
def get_Q_unconstrained(ts, tv, syn, nonsyn, compo, asym_compo, h, log_counts, log_mu, log_kappa, log_omega, d, log_nt_weights): """ This adds a single parameter. """ #FIXME: constructing this each time seems wasteful codon_neighbor_mask = ts + tv #FIXME: this is being hacked to use fixed-order quadrature #FIXME: and to disregard the h parameter mu = algopy.exp(log_mu) kappa = algopy.exp(log_kappa) omega = algopy.exp(log_omega) F = get_selection_F(log_counts, compo, log_nt_weights) S = get_selection_S(F) H = get_fixation_unconstrained_fquad(S, d, g_quad_x, g_quad_w, codon_neighbor_mask) #H = get_fixation_unconstrained_fquad_cython( #S, d, codon_neighbor_mask) pre_Q = mu * (kappa * ts + tv) * (omega * nonsyn + syn) * algopy.exp( algopy.dot(asym_compo, log_nt_weights)) * H Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) return Q
def create_transition_matrix_explicit(Y, v): """ Use hypergeometric functions. Note that d = 2*h - 1 following Kimura 1957. The rate mu is a catch-all scaling factor. The finite distribution v is assumed to be a stochastic vector. @param Y: vector of parameters to optimize @param v: numpy array defining a distribution over states @return: transition matrix """ n = len(v) mu, d = transform_params(Y) # Construct the numpy matrix whose entries # are differences of log equilibrium probabilities. # Everything in this code block is pure numpy. F = numpy.log(v) e = numpy.ones_like(F) S = numpy.outer(e, F) - numpy.outer(F, e) # Create the rate matrix Q and return its matrix exponential. # Things in this code block may use algopy if mu and d # are bundled with truncated Taylor information. D = d * numpy.sign(S) #FIXME: I would like to further vectorize this block, # and also it may currently give subtly wrong results # because denom_piecewise may not vectorize correctly. pre_Q = algopy.zeros((n,n), dtype=Y) for i in range(n): for j in range(n): pre_Q[i, j] = 1. / denom_piecewise(0.5*S[i, j], D[i, j]) pre_Q = mu * pre_Q Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) P = algopy.expm(Q) return P
def create_transition_matrix_explicit(Y, v): """ Use hypergeometric functions. Note that d = 2*h - 1 following Kimura 1957. The rate mu is a catch-all scaling factor. The finite distribution v is assumed to be a stochastic vector. @param Y: vector of parameters to optimize @param v: numpy array defining a distribution over states @return: transition matrix """ n = len(v) mu, d = transform_params(Y) # Construct the numpy matrix whose entries # are differences of log equilibrium probabilities. # Everything in this code block is pure numpy. F = numpy.log(v) e = numpy.ones_like(F) S = numpy.outer(e, F) - numpy.outer(F, e) # Create the rate matrix Q and return its matrix exponential. # Things in this code block may use algopy if mu and d # are bundled with truncated Taylor information. D = d * numpy.sign(S) #FIXME: I would like to further vectorize this block, # and also it may currently give subtly wrong results # because denom_piecewise may not vectorize correctly. pre_Q = algopy.zeros((n, n), dtype=Y) for i in range(n): for j in range(n): pre_Q[i, j] = 1. / denom_piecewise(0.5 * S[i, j], D[i, j]) pre_Q = mu * pre_Q Q = pre_Q - algopy.diag(algopy.sum(pre_Q, axis=1)) P = algopy.expm(Q) return P
def rosen(x): """ Arbitrary-dimensional Rosenbrock function for testing. """ return algopy.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
def f(x): x = x.reshape((2, 2)) return sum(expm(x))
def rhfenergy(alpha_old, coef2, xyz, l, charges, xyz_atom, natoms, nbasis, contr_list, ne, max_scf, max_d, log, eigen, printguess, readguess, name, write, dtype): ''' This function returns the rhf function Parameters: alpha_old : array Gaussian exponents coef2 : array Contraction coeffients xyz : array 3N Gaussian centers l : array 3N Angular momentum each entry is a vector eg. s orbital (0,0,0) or pz (1,0,0) charges : array Atom charges nbasis : int Number of basis contr_list: list of integers Specify the number of orbitals in each atom ne : int Number of electrons max_scf : int maximum number of scf cycles log : bool The exponents are given in log printguess: str or None File to print coeff matrix initial guess readguess : str or None File that contains coeff matrix initial guess name : str Output file name write : bool True if printing dtype : type of output This is the directive to know if algopy will be used or not np.float64(1.0) if it is a single point calculation otherwise, it specify the size of the UTMP, autodifferentiation Returns: energy : float RHF energy ''' tool_D = 1e-8 tool = 1e-8 if log: alpha = algopy.exp(alpha_old) else: alpha = alpha_old if type(xyz_atom) != np.ndarray: ## Cover the case of diff xyz atom coef = normalization(alpha, coef2, l, contr_list, dtype=np.float64(1.0)) V = nuclearmatrix(alpha, coef, xyz, l, nbasis, charges, xyz_atom, natoms, contr_list, dtype=dtype) S = overlapmatrix(alpha, coef, xyz, l, nbasis, contr_list, dtype=np.float64(1.0)) T = kineticmatrix(alpha, coef, xyz, l, nbasis, contr_list, dtype=np.float64(1.0)) Eri = erivector(alpha, coef, xyz, l, nbasis, contr_list, dtype=np.float(1.0)) else: coef = normalization(alpha, coef2, l, contr_list, dtype=dtype) S = overlapmatrix(alpha, coef, xyz, l, nbasis, contr_list, dtype=dtype) V = nuclearmatrix(alpha, coef, xyz, l, nbasis, charges, xyz_atom, natoms, contr_list, dtype=dtype) T = kineticmatrix(alpha, coef, xyz, l, nbasis, contr_list, dtype=dtype) Eri = erivector(alpha, coef, xyz, l, nbasis, contr_list, dtype=dtype) Hcore = T + V if eigen: eigsys = eigensolver(S) SqrtLambda = algopy.diag(1. / algopy.sqrt(eigsys[0])) L = eigsys[1] LT = algopy.transpose(L) SqrtS = algopy.dot(algopy.dot(L, SqrtLambda), LT) SqrtST = algopy.transpose(SqrtS) else: Sinv = np.linalg.inv(S) if readguess != None: C = np.load(readguess) D = np.zeros((nbasis, nbasis)) for i in range(nbasis): for j in range(nbasis): tmp = 0.0 for k in range(ne): tmp = tmp + C[i, k] * C[j, k] D[i, j] = tmp F = fockmatrix(Hcore, Eri, D, nbasis, alpha, dtype) else: F = Hcore OldE = 1e8 status = False E_step = [] for scf_iter in range(max_scf): if eigen: Fprime = algopy.dot(algopy.dot(SqrtST, F), SqrtS) eigsysFockOp = eigensolver(Fprime) Cprime = eigsysFockOp[1] C = algopy.dot(SqrtS, Cprime) Fprime = algopy.dot(algopy.dot(SqrtST, F), SqrtS) eigsysFockOp = eigensolver(Fprime) Cprime = eigsysFockOp[1] C = algopy.dot(SqrtS, Cprime) D = algopy.zeros((nbasis, nbasis), dtype=dtype) for i in range(nbasis): for j in range(nbasis): tmp = 0.0 for k in range(ne): tmp = tmp + C[i, k] * C[j, k] D[i, j] = tmp else: D = newdensity(F, Sinv, nbasis, ne) for i in range(max_d): D = cannonicalputication(D, S) err = np.linalg.norm(D - np.dot(np.dot(D, S), D)) if err < tool_D: break F = fockmatrix(Hcore, Eri, D, nbasis, alpha, dtype) E_elec = algopy.sum(np.multiply(D, Hcore + F)) E_step.append(E_elec) E_nuc = nuclearrepulsion(xyz_atom, charges, natoms) if np.absolute(E_elec - OldE) < tool: status = True break OldE = E_elec E_nuc = nuclearrepulsion(xyz_atom, charges, natoms) if printguess != None: np.save(printguess, C) def update_system(): mol.energy = E_elec + E_nuc mol.erepulsion = Eri mol.hcore = Hcore mol.mo_coeff = C return def write_molden(): import Data from Data import select_atom ## Details of calculation tape.write('[Energy] \n') tape.write('E_elec: ' + str(E_elec) + '\n') tape.write('E_nuc: ' + str(E_nuc) + '\n') tape.write('E_tot: ' + str(E_nuc + E_elec) + '\n') tape.write('SCF Details\n') line = 'Eigen: ' if eigen: tape.write(line + 'True') else: tape.write(line + 'False') tape.write('\n') for i, step in enumerate(E_step): line = 'Step: ' + str(i) + ' ' + str(step) tape.write(line + '\n') ### C Matrix tape.write('[CM] \n') tape.write('C AO times MO\n') printmatrix(C, tape) ### D Matrix tape.write('[DM] \n') tape.write('D \n') printmatrix(D, tape) ### D Matrix tape.write('[NMO] \n') tape.write('NMO \n') printmatrix(mo_naturalorbital(D), tape) ### MO energies tape.write('[MOE] \n') tape.write('MOE \n') for i, energ in enumerate(eigsysFockOp[0]): tape.write(str(i) + ' ' + str(energ) + '\n') ### MO energies tape.write('[INPUT] \n') line = 'mol = [' for i, coord in enumerate(xyz_atom): line += '(' + str(charges[i]) + ',' line += '(' + str(coord[0]) + ',' + str(coord[1]) + ',' + str( coord[2]) + ')),\n' tape.write(line) cont = 0 line = 'basis = [' for i, ci in enumerate(contr_list): line += '[' line += '(' + str(l[i][0]) + ',' + str(l[i][1]) + ',' + str( l[i][2]) + '),' for ii in range(ci): line += str(alpha[cont]) + ',' + str(coef[i]) line += ',(' + str(xyz[i, 0]) + ',' + str( xyz[i, 1]) + ',' + str(xyz[i, 2]) + ')],\n' cont += 1 line += ']\n' tape.write(line) ### Atom coordinates tape.write('[Atoms]\n') for i, coord in enumerate(xyz_atom): line = select_atom.get(charges[i]) line += ' ' + str(i + 1) + ' ' + str(charges[i]) line += ' ' + str(coord[0]) + ' ' + str(coord[1]) + ' ' + str( coord[2]) + '\n' tape.write(line) ### Basis coordinates for i, coord in enumerate(xyz): line = 'XX' line += ' ' + str(i + natoms + 1) + ' ' + str(0) line += ' ' + str(coord[0]) + ' ' + str(coord[1]) + ' ' + str( coord[2]) + '\n' tape.write(line) ### Basis set cont = 0 tape.write('[GTO]\n') for i, ci in enumerate(contr_list): tape.write(' ' + str(i + 1 + natoms) + ' 0\n') if np.sum(l[i]) == 0: tape.write(' s ' + str(ci) + ' 1.0 ' + str(l[i][0]) + ' ' + str(l[i][1]) + ' ' + str(l[i][2]) + '\n') else: tape.write(' p ' + str(ci) + ' 1.0 ' + str(l[i][0]) + ' ' + str(l[i][1]) + ' ' + str(l[i][2]) + '\n') #tape.write(' p '+str(1)+' 1.0 '+ str(l[i])+'\n') for ii in range(ci): line = ' ' + str(alpha[cont]) + ' ' + str(coef[cont]) + '\n' tape.write(line) cont += 1 line = ' \n' tape.write(line) ### MOs tape.write('[MO]\n') for j in range(nbasis): tape.write(' Sym= None\n') tape.write(' Ene= ' + str(eigsysFockOp[0][j]) + '\n') tape.write(' Spin= Alpha\n') if j > ne: tape.write(' Occup= 0.0\n') else: tape.write(' Occup= 2.0\n') for i in range(nbasis): tape.write(str(i + 1 + natoms) + ' ' + str(C[i, j]) + '\n') if status: if write: tape = open(name + '.molden', "w") write_molden() tape.close() return E_elec + E_nuc else: print('E_elec: ' + str(E_elec) + '\n') print('E_nuc: ' + str(E_nuc) + '\n') print('E_tot: ' + str(E_nuc + E_elec) + '\n') print('SCF DID NOT CONVERGED') return 99999 return E_elec + E_nuc
def obj(self, x, **kwargs): return algopy.sum(100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
def f(x): x = x.reshape((2,2)) return sum(expm(x))