def unconstrained_recessivity_fixation( adjacency, kimura_d, S, ): """ This should be compatible with algopy. But it may be very slow. @param adjacency: a binary design matrix to reduce unnecessary computation @param kimura_d: a parameter that might carry Taylor information @param S: an ndarray of selection differences with Taylor information return: an ndarray of fixation probabilities with Taylor information """ x = g_quad_x w = g_quad_w nstates = S.shape[0] D = algopy.sign(S) * kimura_d H = algopy.zeros_like(S) for i in range(nstates): for j in range(nstates): if not adjacency[i, j]: continue tmp_a = - S[i, j] * x tmp_b = algopy.exp(tmp_a * (D[i, j] * (1-x) + 1)) tmp_c = algopy.dot(tmp_b, w) H[i, j] = algopy.reciprocal(tmp_c) return H
def unrolled_unconstrained_recessivity_fixation( adjacency, kimura_d, S, ): """ This should be compatible with algopy. But it may be very slow. The unrolling is with respect to a dot product. @param adjacency: a binary design matrix to reduce unnecessary computation @param kimura_d: a parameter that might carry Taylor information @param S: an ndarray of selection differences with Taylor information return: an ndarray of fixation probabilities with Taylor information """ nknots = len(g_quad_x) nstates = S.shape[0] D = algopy.sign(S) * kimura_d H = algopy.zeros_like(S) for i in range(nstates): for j in range(nstates): if not adjacency[i, j]: continue for x, w in zip(g_quad_x, g_quad_w): tmp_a = - S[i, j] * x tmp_b = algopy.exp(tmp_a * (D[i, j] * (1-x) + 1)) H[i, j] += tmp_b * w H[i, j] = algopy.reciprocal(H[i, j]) return H
def get_two_taxon_neg_ll_encoded_theta( model, subs_counts, log_counts, codon_distn, ts, tv, syn, nonsyn, compo, asym_compo, encoded_theta, ): """ Get the negative log likelihood. This function uses the logarithms of the model parameters. The first param group is the model implementation. The second param group is the data. The third param group consists of data summaries. The fourth param group consists of design matrices related to genetic code. The fifth param group consist of free parameters of the model. """ branch_length = algopy.exp(encoded_theta[0]) encoded_model_theta = encoded_theta[1:] natural_model_theta = model.encoded_to_natural(encoded_model_theta) natural_theta = algopy.zeros_like(encoded_theta) natural_theta[0] = branch_length natural_theta[1:] = natural_model_theta return get_two_taxon_neg_ll( model, subs_counts, log_counts, codon_distn, ts, tv, syn, nonsyn, compo, asym_compo, natural_theta, )
def get_fixation_unconstrained(S, d): sign_S = algopy.sign(S) D = d * sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): H[i, j] = 1. / kimrecessive.denom_piecewise(0.5 * S[i, j], D[i, j]) return H
def get_fixation_dominant_disease(S): sign_S = algopy.sign(S) H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): H[i, j] = 1. / kimrecessive.denom_piecewise( 0.5*S[i, j], -sign_S[i, j]) return H
def encoded_to_natural(cls, encoded_theta): """ The first parameter is a proportion. """ natural = algopy.zeros_like(encoded_theta) natural[0] = algopy.special.expit(encoded_theta[0]) natural[1:] = algopy.exp(encoded_theta[1:]) return natural
def natural_to_encoded(cls, natural_theta): """ The first parameter is a proportion. """ encoded = algopy.zeros_like(natural_theta) encoded[0] = algopy.special.logit(natural_theta[0]) encoded[1:] = algopy.log(natural_theta[1:]) return encoded
def get_fixation_unconstrained(S, d): sign_S = algopy.sign(S) D = d * sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): H[i, j] = 1. / kimrecessive.denom_piecewise( 0.5*S[i, j], D[i, j]) return H
def get_fixation_unconstrained_kb(S, d, log_kb): """ This uses the Kacser and Burns effect instead of the sign function. """ soft_sign_S = algopy.tanh(algopy.exp(log_kb) * S) D = d * soft_sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): H[i, j] = 1. / kimrecessive.denom_piecewise(0.5 * S[i, j], D[i, j]) return H
def natural_to_encoded(cls, natural_theta): """ The first parameter is a proportion. The fourth parameter is unconstrained. """ encoded = algopy.zeros_like(natural_theta) encoded[0] = algopy.special.logit(natural_theta[0]) encoded[1] = algopy.log(natural_theta[1]) encoded[2] = algopy.log(natural_theta[2]) encoded[3] = natural_theta[3] encoded[4:] = algopy.log(natural_theta[4:]) return encoded
def get_fixation_unconstrained_kb(S, d, log_kb): """ This uses the Kacser and Burns effect instead of the sign function. """ soft_sign_S = algopy.tanh(algopy.exp(log_kb)*S) D = d * soft_sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): H[i, j] = 1. / kimrecessive.denom_piecewise( 0.5*S[i, j], D[i, j]) return H
def encoded_to_natural(cls, encoded_theta): """ The first parameter is a proportion. The fourth parameter is unconstrained. """ natural = algopy.zeros_like(encoded_theta) natural[0] = algopy.special.expit(encoded_theta[0]) natural[1] = algopy.exp(encoded_theta[1]) natural[2] = algopy.exp(encoded_theta[2]) natural[3] = encoded_theta[3] natural[4:] = algopy.exp(encoded_theta[4:]) return natural
def algopy_unconstrained_recessivity_fixation( kimura_d, S, ): """ This is only compatible with algopy and is not compatible with numpy. It takes ridiculous measures to compute higher order derivatives. @param adjacency: a binary design matrix to reduce unnecessary computation @param kimura_d: a parameter that might carry Taylor information @param S: an ndarray of selection differences with Taylor information return: an ndarray of fixation probabilities with Taylor information """ nstates = S.shape[0] D = algopy.sign(S) * kimura_d H = algopy.zeros_like(S) ncoeffs = S.data.shape[0] shp = (ncoeffs, -1) S_data_reshaped = S.data.reshape(shp) D_data_reshaped = D.data.reshape(shp) H_data_reshaped = H.data.reshape(shp) tmp_a = algopy.zeros_like(H) tmp_b = algopy.zeros_like(H) tmp_c = algopy.zeros_like(H) tmp_a_data_reshaped = tmp_a.data.reshape(shp) tmp_b_data_reshaped = tmp_b.data.reshape(shp) tmp_c_data_reshaped = tmp_c.data.reshape(shp) pykimuracore.kimura_algopy( g_quad_x, g_quad_w, S_data_reshaped, D_data_reshaped, tmp_a_data_reshaped, tmp_b_data_reshaped, tmp_c_data_reshaped, H_data_reshaped, ) return H
def get_fixation_unconstrained_kb_fquad( S, d, log_kb, x, w, codon_neighbor_mask): """ This uses the Kacser and Burns effect instead of the sign function. """ #TODO: possibly use a mirror symmetry to double the speed soft_sign_S = algopy.tanh(algopy.exp(log_kb)*S) D = d * soft_sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): if codon_neighbor_mask[i, j]: H[i, j] = 1. / kimrecessive.denom_fixed_quad( 0.5*S[i, j], D[i, j], x, w) return H
def get_two_taxon_neg_ll( model, em_probs, em_distns, subs_counts, ts, tv, syn, nonsyn, compo, asym_compo, natural_theta, ): """ Get the negative log likelihood. This function does not use the logarithms. It is mostly for computing the hessian; otherwise the version with the logarithms would probably be better. The first param group is the model implementation. The second param group is expectation-maximization stuff. The third param group is the data. The next param group consists of design matrices related to genetic code. The next param group consist of free parameters of the model. """ # unpack some parameters branch_length = natural_theta[0] natural_model_theta = natural_theta[1:] # compute the appropriately scaled transition matrices pre_Qs = model.get_pre_Qs( em_probs, em_distns, ts, tv, syn, nonsyn, compo, asym_compo, natural_model_theta) eq_distns = model.get_distns( em_probs, em_distns, ts, tv, syn, nonsyn, compo, asym_compo, natural_model_theta) Ps = markovutil.get_branch_mix(em_probs, pre_Qs, eq_distns, branch_length) # compute the mixture transition matrix P_mix = algopy.zeros_like(Ps[0]) P_mix += em_probs[0] * (Ps[0].T * eq_distns[0]).T P_mix += em_probs[1] * (Ps[1].T * eq_distns[1]).T # compute the neg log likelihood neg_ll = -algopy.sum(algopy.log(P_mix) * subs_counts) print neg_ll return neg_ll
def get_ll_root(summary, distn, blink_on, blink_off): """ Parameters ---------- summary : Summary object Summary of blinking process trajectories. distn : dense possibly exotic array Primary state distribution. blink_on : float, or exotic float-like with derivatives information blink rate on blink_off : float, or exotic float-like with derivatives information blink rate off Returns ------- ll : float, or exotic float-like with derivatives information log likelihood contribution from root state """ # construct the blink distribution with the right data type blink_distn = algopy.zeros(2, dtype=distn) blink_distn[0] = blink_off / (blink_on + blink_off) blink_distn[1] = blink_on / (blink_on + blink_off) # initialize expected log likelihood using the right data type ll = algopy.zeros(1, dtype=distn)[0] # root primary state contribution to expected log likelihood obs = algopy.zeros_like(distn) for state, count in summary.root_pri_to_count.items(): if count: ll = ll + count * log(distn[state]) # root blink state contribution to expected log likelihood if summary.root_off_count: ll = ll + summary.root_off_count * log(blink_distn[0]) if summary.root_xon_count: ll = ll + summary.root_xon_count * log(blink_distn[1]) # return expected log likelihood contribution of root return ll / summary.nsamples
def get_fixation_unconstrained_fquad(S, d, x, w, codon_neighbor_mask): """ In this function name, fquad means "fixed quadrature." The S ndarray with ndim=2 depends on free parameters. The d parameter is itself a free parameter. So both of those things are algopy objects carrying Taylor information. On the other hand, x and w are precomputed ndim=1 ndarrays which are not carrying around extra Taylor information. @param S: array of selection differences @param d: parameter that controls dominance vs. recessivity @param x: precomputed roots for quadrature @param w: precomputed weights for quadrature @param codon_neighbor_mask: only compute entries of neighboring codon pairs """ #TODO: possibly use a mirror symmetry to double the speed sign_S = algopy.sign(S) D = d * sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): if codon_neighbor_mask[i, j]: H[i, j] = 1. / kimrecessive.denom_fixed_quad( 0.5*S[i, j], D[i, j], x, w) return H
def get_fixation_unconstrained_fquad(S, d, x, w, codon_neighbor_mask): """ In this function name, fquad means "fixed quadrature." The S ndarray with ndim=2 depends on free parameters. The d parameter is itself a free parameter. So both of those things are algopy objects carrying Taylor information. On the other hand, x and w are precomputed ndim=1 ndarrays which are not carrying around extra Taylor information. @param S: array of selection differences @param d: parameter that controls dominance vs. recessivity @param x: precomputed roots for quadrature @param w: precomputed weights for quadrature @param codon_neighbor_mask: only compute entries neighboring pairs """ #TODO: possibly use a mirror symmetry to double the speed sign_S = algopy.sign(S) D = d * sign_S H = algopy.zeros_like(S) for i in range(H.shape[0]): for j in range(H.shape[1]): if codon_neighbor_mask[i, j]: H[i, j] = 1. / kimrecessive.denom_fixed_quad( 0.5*S[i, j], D[i, j], x, w) return H
def encoded_to_natural(cls, encoded_theta): natural_theta = algopy.zeros_like(encoded_theta) natural_theta[0] = encoded_theta[0] natural_theta[1:] = algopy.log(encoded_theta[1:]) cls.check_theta(natural_theta) return natural_theta
def encoded_to_natural(cls, encoded_theta): natural_theta = algopy.zeros_like(encoded_theta) natural_theta[0] = encoded_theta[0] natural_theta[1:] = algopy.exp(encoded_theta[1:]) return natural_theta
def natural_to_encoded(cls, natural_theta): encoded_theta = algopy.zeros_like(natural_theta) encoded_theta[0] = natural_theta[0] encoded_theta[1:] = algopy.log(natural_theta[1:]) return encoded_theta
def get_distn(params): n = 2 distn = algopy.zeros_like(params) distn[0] = params[1] / params.sum() distn[1] = params[0] / params.sum() return distn
def natural_to_encoded(cls, natural_theta): cls.check_theta(natural_theta) encoded_theta = algopy.zeros_like(natural_theta) encoded_theta[0] = natural_theta[0] encoded_theta[1:] = algopy.exp(natural_theta[1:]) return encoded_theta