コード例 #1
0
 def _do_viterbi_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     state_sequence, logprob = _hmmc._viterbi(n_samples, n_components,
                                              np.log(self.startprob_),
                                              np.log(self.transmat_),
                                              framelogprob)
     return logprob, state_sequence
コード例 #2
0
ファイル: base.py プロジェクト: simonkamronn/autohmm
 def _do_viterbi_pass(self, framelogprob):
     # Based on hmmlearn's _BaseHMM
     safe_startmat = self.startprob_ + np.finfo(float).eps
     safe_transmat = self.transmat_ + np.finfo(float).eps
     n_samples, n_components = framelogprob.shape
     state_sequence, logprob = _hmmc._viterbi(
         n_samples, n_components, np.log(safe_startmat),
         np.log(safe_transmat), framelogprob)
     return logprob, state_sequence
コード例 #3
0
ファイル: base.py プロジェクト: toinsson/autohmm
 def _do_viterbi_pass(self, framelogprob):
     # Based on hmmlearn's _BaseHMM
     safe_startmat = self.startprob_ + np.finfo(float).eps
     safe_transmat = self.transmat_ + np.finfo(float).eps
     n_samples, n_components = framelogprob.shape
     state_sequence, logprob = _hmmc._viterbi(
         n_samples, n_components, np.log(safe_startmat),
         np.log(safe_transmat), framelogprob)
     return logprob, state_sequence
コード例 #4
0
 def _do_viterbi_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     # logprob, state_sequence = _routines._viterbi(
     #     n_samples, n_components, log_mask_zero(self.startprob_),
     #     log_mask_zero(self.transmat_), framelogprob)
     state_sequence, logprob = _hmmc._viterbi(
             n_samples, n_components, log_mask_zero(self.startprob_),
             log_mask_zero(self.transmat_), framelogprob)
     return logprob, state_sequence
コード例 #5
0
def infer_allele_cn(clone_cn_data, hap_data, loh_error_rate=0.01):
    """ HMM inference of clone and allele specific copy number based on haplotype
    allele read counts.
    """
    cn = clone_cn_data.merge(
        hap_data,
        on=['chr', 'start', 'end', 'cluster_id'],
        how='left',
    ).fillna(0).rename(columns={'integer_copy_number': 'total_cn'})

    cn['total_cn'] = cn['total_cn'].astype(int).astype(float)

    total_cn = cn['total_cn'].values

    minor_cn = np.tile(np.arange(0, 10, 1), (cn.shape[0], 1))
    n = np.tile(cn['total_counts_sum'].astype(int), (minor_cn.shape[1], 1)).T
    x = np.tile(cn[['allele_1_sum', 'allele_2_sum']].min(axis=1).astype(int),
                (minor_cn.shape[1], 1)).T
    p = minor_cn / total_cn[:, np.newaxis]

    # Add an error term for the loh state
    # Rational: we can think of each states probability of a minor allele
    # read as being beta distributed around the expected calculated as
    # the ratio of minor to total copy number.  For most states the expectation
    # will be equal to the copy number ratio except for the loh state which will
    # have the configurable offset from 0 governed by the loh_error_rate term
    p[:, 0] = loh_error_rate

    l = binom.logpmf(x, n, p)
    l = np.logaddexp(l, np.log(0.01))

    # Set the likelihood to be very low for impossible states
    l[minor_cn > total_cn[:, np.newaxis]] = -1000.

    # Set the likelihood to a low value that is still greater than the
    # impossible state value if the likelihood is nan.  This will primarily
    # catch the case where minor copy number equals total copy number, which
    # is in general not a valid solution unless total copy number is 0
    l[np.isnan(l)] = -100.

    minor_cn_data = (pd.DataFrame(l,
                                  index=cn.set_index([
                                      'cluster_id', 'chr', 'start', 'end',
                                      'hap_label'
                                  ]).index).groupby(level=[0, 1, 2, 3]).sum())

    minor_cn = pd.DataFrame(index=minor_cn_data.index)
    minor_cn['minor_cn'] = None

    for clone_id in cn['cluster_id'].unique():
        framelogprob = minor_cn_data.loc[clone_id].values

        N_chain = framelogprob.shape[0]
        N_states = framelogprob.shape[1]

        seq, prob = _viterbi(
            N_chain, N_states, np.log(np.ones(N_states) / N_states),
            np.log(np.eye(N_states) * 1e4 + np.ones((N_states, N_states))),
            framelogprob)

        minor_cn.loc[clone_id, 'minor_cn'] = seq

    minor_cn = minor_cn.reset_index()
    minor_cn = minor_cn.merge(
        cn[['chr', 'start', 'end', 'cluster_id',
            'total_cn']].drop_duplicates(),
        how='left')
    minor_cn['total_cn'] = minor_cn['total_cn'].astype(int)
    minor_cn['major_cn'] = minor_cn['total_cn'] - minor_cn['minor_cn']

    assert minor_cn['minor_cn'].notnull().any()
    assert minor_cn['major_cn'].notnull().any()
    assert minor_cn['total_cn'].notnull().any()

    return minor_cn
コード例 #6
0
def _viterbi(log_startprob, log_transmat, framelogprob):
    n_samples, n_components = framelogprob.shape
    ali, logprob = _hmmc._viterbi(
        n_samples, n_components, log_startprob, log_transmat, framelogprob
    )
    return ali
コード例 #7
0
ファイル: base.py プロジェクト: davecusatis/HMMTest
 def _do_viterbi_pass(self, framelogprob):
     n_observations, n_components = framelogprob.shape
     state_sequence, logprob = _hmmc._viterbi(
         n_observations, n_components, self._log_startprob,
         self._log_transmat, framelogprob)
     return logprob, state_sequence