def _do_viterbi_pass(self, framelogprob): n_samples, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi(n_samples, n_components, np.log(self.startprob_), np.log(self.transmat_), framelogprob) return logprob, state_sequence
def _do_viterbi_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi( n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob) return logprob, state_sequence
def _do_viterbi_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi( n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob) return logprob, state_sequence
def _do_viterbi_pass(self, framelogprob): n_samples, n_components = framelogprob.shape # logprob, state_sequence = _routines._viterbi( # n_samples, n_components, log_mask_zero(self.startprob_), # log_mask_zero(self.transmat_), framelogprob) state_sequence, logprob = _hmmc._viterbi( n_samples, n_components, log_mask_zero(self.startprob_), log_mask_zero(self.transmat_), framelogprob) return logprob, state_sequence
def infer_allele_cn(clone_cn_data, hap_data, loh_error_rate=0.01): """ HMM inference of clone and allele specific copy number based on haplotype allele read counts. """ cn = clone_cn_data.merge( hap_data, on=['chr', 'start', 'end', 'cluster_id'], how='left', ).fillna(0).rename(columns={'integer_copy_number': 'total_cn'}) cn['total_cn'] = cn['total_cn'].astype(int).astype(float) total_cn = cn['total_cn'].values minor_cn = np.tile(np.arange(0, 10, 1), (cn.shape[0], 1)) n = np.tile(cn['total_counts_sum'].astype(int), (minor_cn.shape[1], 1)).T x = np.tile(cn[['allele_1_sum', 'allele_2_sum']].min(axis=1).astype(int), (minor_cn.shape[1], 1)).T p = minor_cn / total_cn[:, np.newaxis] # Add an error term for the loh state # Rational: we can think of each states probability of a minor allele # read as being beta distributed around the expected calculated as # the ratio of minor to total copy number. For most states the expectation # will be equal to the copy number ratio except for the loh state which will # have the configurable offset from 0 governed by the loh_error_rate term p[:, 0] = loh_error_rate l = binom.logpmf(x, n, p) l = np.logaddexp(l, np.log(0.01)) # Set the likelihood to be very low for impossible states l[minor_cn > total_cn[:, np.newaxis]] = -1000. # Set the likelihood to a low value that is still greater than the # impossible state value if the likelihood is nan. This will primarily # catch the case where minor copy number equals total copy number, which # is in general not a valid solution unless total copy number is 0 l[np.isnan(l)] = -100. minor_cn_data = (pd.DataFrame(l, index=cn.set_index([ 'cluster_id', 'chr', 'start', 'end', 'hap_label' ]).index).groupby(level=[0, 1, 2, 3]).sum()) minor_cn = pd.DataFrame(index=minor_cn_data.index) minor_cn['minor_cn'] = None for clone_id in cn['cluster_id'].unique(): framelogprob = minor_cn_data.loc[clone_id].values N_chain = framelogprob.shape[0] N_states = framelogprob.shape[1] seq, prob = _viterbi( N_chain, N_states, np.log(np.ones(N_states) / N_states), np.log(np.eye(N_states) * 1e4 + np.ones((N_states, N_states))), framelogprob) minor_cn.loc[clone_id, 'minor_cn'] = seq minor_cn = minor_cn.reset_index() minor_cn = minor_cn.merge( cn[['chr', 'start', 'end', 'cluster_id', 'total_cn']].drop_duplicates(), how='left') minor_cn['total_cn'] = minor_cn['total_cn'].astype(int) minor_cn['major_cn'] = minor_cn['total_cn'] - minor_cn['minor_cn'] assert minor_cn['minor_cn'].notnull().any() assert minor_cn['major_cn'].notnull().any() assert minor_cn['total_cn'].notnull().any() return minor_cn
def _viterbi(log_startprob, log_transmat, framelogprob): n_samples, n_components = framelogprob.shape ali, logprob = _hmmc._viterbi( n_samples, n_components, log_startprob, log_transmat, framelogprob ) return ali
def _do_viterbi_pass(self, framelogprob): n_observations, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi( n_observations, n_components, self._log_startprob, self._log_transmat, framelogprob) return logprob, state_sequence