def _update_dvect(cls, state: SiteInstanceState, rand: np.random.RandomState): ''' Update the distance vector (approximated using the geometric distribution) and dposterior based on the new state :param state: The current state variables of the algorithm :param rand: The random number generator to use :return: The modified dvector (will also modify state.dvect and state.dposterior) ''' # only update if there is at least 1 recrudescent infection if np.sum(state.classification == SampleType.RECRUDESCENCE.value) == 0: return d_prior_alpha = 0 d_prior_beta = 0 min_recrudescence_distances = state.mindistance[ state.classification == SampleType.RECRUDESCENCE.value, :] d_posterior_alpha = d_prior_alpha + min_recrudescence_distances.size d_posterior_beta = d_prior_beta + np.sum( np.round(min_recrudescence_distances)) if d_posterior_beta == 0: d_posterior_beta = np.sum(min_recrudescence_distances) if d_posterior_beta == 0: # algorithm will get stuck if dposterior is allowed to go to 1 (TODO: Wait, so why is it setting d_posterior_beta to 1??) d_posterior_beta = 1 # TODO: Verify how this update actually works? state.dposterior = rand.beta(d_posterior_alpha, d_posterior_beta) # update dvect (approximate using geometric distribution) state.dvect = state.dposterior * (np.array(1 - state.dposterior)** np.arange(0, state.dvect.size)) state.dvect = state.dvect / np.sum(state.dvect) return state.dvect
def _update_q(cls, state: SiteInstanceState, rand: np.random.RandomState): ''' TODO: Possibly move this to the state object itself? Propose a new value for q (the proportion of alleles that are hidden/ not directly observed) and update it appropriately, based on the current state :param state: The current state variables of the algorithm :param rand: The random number generator to use :return: The updated q value (will also modify state.q) ''' # TODO: What are the alpha/beta used for, in high-level terms? They seem # to be counts of observed/missing alleles in the hidden state? q_prior_alpha = 0 q_prior_beta = 0 q_posterior_alpha = ( q_prior_alpha + np.nansum(state.hidden0 == HiddenAlleleType.MISSING.value) + np.nansum(state.hiddenf == HiddenAlleleType.MISSING.value)) q_posterior_beta = ( q_prior_beta + np.nansum(state.hidden0 == HiddenAlleleType.OBSERVED.value) + np.nansum(state.hiddenf == HiddenAlleleType.OBSERVED.value)) # Edge case if there are no missing/observed alleles, to avoid div by 0 if q_posterior_alpha == 0: q_posterior_alpha = 1 if q_posterior_beta == 0: # TODO: Added this due to numpy warning, possibly remove? q_posterior_beta = 1 # propose new q (beta distribution is conjugate distribution for # binomial process) state.qq = rand.beta(q_posterior_alpha, q_posterior_beta) return state.qq