def test_2(): n_features = 3 length = 32 for n_states in [4]: t1 = np.random.randn(length, n_features) means = np.random.randn(n_states, n_features) vars = np.random.rand(n_states, n_features) transmat = np.random.rand(n_states, n_states) transmat = transmat / np.sum(transmat, axis=1)[:, None] startprob = np.random.rand(n_states) startprob = startprob / np.sum(startprob) chmm = GaussianHMMCPUImpl(n_states, n_features) chmm._sequences = [t1] pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag') chmm.means_ = means.astype(np.float32) chmm.vars_ = vars.astype(np.float32) chmm.transmat_ = transmat.astype(np.float32) chmm.startprob_ = startprob.astype(np.float32) clogprob, cstats = chmm.do_estep() pyhmm.means_ = means pyhmm.covars_ = vars pyhmm.transmat_ = transmat pyhmm.startprob_ = startprob framelogprob = pyhmm._compute_log_likelihood(t1) fwdlattice = pyhmm._do_forward_pass(framelogprob)[1] bwdlattice = pyhmm._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T stats = pyhmm._initialize_sufficient_statistics() pyhmm._accumulate_sufficient_statistics(stats, t1, framelogprob, posteriors, fwdlattice, bwdlattice, 'stmc') yield lambda: np.testing.assert_array_almost_equal( stats['trans'], cstats['trans'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['post'], cstats['post'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['obs'], cstats['obs'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['obs**2'], cstats['obs**2'], decimal=3)
class _SklearnGaussianHMMCPUImpl(object): def __init__(self, n_states, n_features): from sklearn.hmm import GaussianHMM self.impl = GaussianHMM(n_states, params='stmc') self._sequences = None self.means_ = None self.vars_ = None self.transmat_ = None self.startprob_ = None def do_estep(self): from sklearn.utils.extmath import logsumexp self.impl.means_ = self.means_.astype(np.double) self.impl.covars_ = self.vars_.astype(np.double) self.impl.transmat_ = self.transmat_.astype(np.double) self.impl.startprob_ = self.startprob_.astype(np.double) stats = self.impl._initialize_sufficient_statistics() curr_logprob = 0 for seq in self._sequences: seq = seq.astype(np.double) framelogprob = self.impl._compute_log_likelihood(seq) lpr, fwdlattice = self.impl._do_forward_pass(framelogprob) bwdlattice = self.impl._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T curr_logprob += lpr self.impl._accumulate_sufficient_statistics( stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, self.impl.params) return curr_logprob, stats def do_viterbi(self): logprob = 0 state_sequences = [] for obs in self._sequences: lpr, ss = self.impl._decode_viterbi(obs) logprob += lpr state_sequences.append(ss) return logprob, state_sequences
def test_2(): n_features = 3 length = 32 for n_states in [4]: t1 = np.random.randn(length, n_features) means = np.random.randn(n_states, n_features) vars = np.random.rand(n_states, n_features) transmat = np.random.rand(n_states, n_states) transmat = transmat / np.sum(transmat, axis=1)[:, None] startprob = np.random.rand(n_states) startprob = startprob / np.sum(startprob) chmm = GaussianHMMCPUImpl(n_states, n_features) chmm._sequences = [t1] pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag') chmm.means_ = means.astype(np.float32) chmm.vars_ = vars.astype(np.float32) chmm.transmat_ = transmat.astype(np.float32) chmm.startprob_ = startprob.astype(np.float32) clogprob, cstats = chmm.do_estep() pyhmm.means_ = means pyhmm.covars_ = vars pyhmm.transmat_ = transmat pyhmm.startprob_ = startprob framelogprob = pyhmm._compute_log_likelihood(t1) fwdlattice = pyhmm._do_forward_pass(framelogprob)[1] bwdlattice = pyhmm._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T stats = pyhmm._initialize_sufficient_statistics() pyhmm._accumulate_sufficient_statistics( stats, t1, framelogprob, posteriors, fwdlattice, bwdlattice, 'stmc') yield lambda: np.testing.assert_array_almost_equal(stats['trans'], cstats['trans'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['post'], cstats['post'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['obs'], cstats['obs'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['obs**2'], cstats['obs**2'], decimal=3)
def hmm(samples): model = GaussianHMM(n_components=3) samples = samples.dropna() idx = samples.index if samples.values.ndim < 2: #import pdb; pdb.set_trace() m = samples.values.shape samples = samples.values.reshape(m[0],1) model.fit([samples]) #_, states = model.decode(samples, algorithm='map') framelogprob = model._compute_log_likelihood(samples) logprob, fwdlattice = model._do_forward_pass(framelogprob) n, _ = model.means_.shape frame = pd.DataFrame( framelogprob, index=idx, columns=map(lambda x: "frame_"+str(x), range(n)) ) forward = pd.DataFrame( fwdlattice, index=idx, columns=map(lambda x: "forward_"+str(x), range(n)) ) #import pdb; pdb.set_trace() predict = pd.DataFrame( (fwdlattice-framelogprob)[1:, :], index=idx[:-1], columns=map(lambda x: "predict_"+str(x), range(n))) import pdb; pdb.set_trace() return model, frame.join(forward)
def test_2(): np.random.seed(42) n_features = 32 length = 20 #for n_states in [3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32]: for n_states in [8]: t1 = np.random.randn(length, n_features) means = np.random.randn(n_states, n_features) vars = np.random.rand(n_states, n_features) transmat = np.random.rand(n_states, n_states) transmat = transmat / np.sum(transmat, axis=1)[:, None] startprob = np.random.rand(n_states) startprob = startprob / np.sum(startprob) cuhmm = GaussianHMMCUDAImpl(n_states, n_features) cuhmm._sequences = [t1] pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag') cuhmm.means_ = means cuhmm.vars_ = vars cuhmm.transmat_ = transmat cuhmm.startprob_ = startprob logprob, custats = cuhmm.do_estep() pyhmm.means_ = means pyhmm.covars_ = vars pyhmm.transmat_ = transmat pyhmm.startprob_ = startprob pyhmm._initialize_sufficient_statistics() framelogprob = pyhmm._compute_log_likelihood(t1) cuframelogprob = cuhmm._get_framelogprob() yield lambda: np.testing.assert_array_almost_equal(framelogprob, cuframelogprob, decimal=3) fwdlattice = pyhmm._do_forward_pass(framelogprob)[1] cufwdlattice = cuhmm._get_fwdlattice() yield lambda: np.testing.assert_array_almost_equal(fwdlattice, cufwdlattice, decimal=3) bwdlattice = pyhmm._do_backward_pass(framelogprob) cubwdlattice = cuhmm._get_bwdlattice() yield lambda: np.testing.assert_array_almost_equal(bwdlattice, cubwdlattice, decimal=3) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T cuposteriors = cuhmm._get_posteriors() yield lambda: np.testing.assert_array_almost_equal(posteriors, cuposteriors, decimal=3) stats = pyhmm._initialize_sufficient_statistics() pyhmm._accumulate_sufficient_statistics( stats, t1, framelogprob, posteriors, fwdlattice, bwdlattice, 'stmc') print 'ref transcounts' print transitioncounts(cufwdlattice, cubwdlattice, cuframelogprob, np.log(transmat)) print 'cutranscounts' print custats['trans'] yield lambda: np.testing.assert_array_almost_equal(stats['trans'], custats['trans'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['post'], custats['post'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['obs'], custats['obs'], decimal=3) yield lambda: np.testing.assert_array_almost_equal(stats['obs**2'], custats['obs**2'], decimal=3)
def test_2(): np.random.seed(42) n_features = 32 length = 20 #for n_states in [3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32]: for n_states in [8]: t1 = np.random.randn(length, n_features) means = np.random.randn(n_states, n_features) vars = np.random.rand(n_states, n_features) transmat = np.random.rand(n_states, n_states) transmat = transmat / np.sum(transmat, axis=1)[:, None] startprob = np.random.rand(n_states) startprob = startprob / np.sum(startprob) cuhmm = GaussianHMMCUDAImpl(n_states, n_features) cuhmm._sequences = [t1] pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag') cuhmm.means_ = means cuhmm.vars_ = vars cuhmm.transmat_ = transmat cuhmm.startprob_ = startprob logprob, custats = cuhmm.do_estep() pyhmm.means_ = means pyhmm.covars_ = vars pyhmm.transmat_ = transmat pyhmm.startprob_ = startprob pyhmm._initialize_sufficient_statistics() framelogprob = pyhmm._compute_log_likelihood(t1) cuframelogprob = cuhmm._get_framelogprob() yield lambda: np.testing.assert_array_almost_equal( framelogprob, cuframelogprob, decimal=3) fwdlattice = pyhmm._do_forward_pass(framelogprob)[1] cufwdlattice = cuhmm._get_fwdlattice() yield lambda: np.testing.assert_array_almost_equal( fwdlattice, cufwdlattice, decimal=3) bwdlattice = pyhmm._do_backward_pass(framelogprob) cubwdlattice = cuhmm._get_bwdlattice() yield lambda: np.testing.assert_array_almost_equal( bwdlattice, cubwdlattice, decimal=3) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T cuposteriors = cuhmm._get_posteriors() yield lambda: np.testing.assert_array_almost_equal( posteriors, cuposteriors, decimal=3) stats = pyhmm._initialize_sufficient_statistics() pyhmm._accumulate_sufficient_statistics(stats, t1, framelogprob, posteriors, fwdlattice, bwdlattice, 'stmc') print 'ref transcounts' print transitioncounts(cufwdlattice, cubwdlattice, cuframelogprob, np.log(transmat)) print 'cutranscounts' print custats['trans'] yield lambda: np.testing.assert_array_almost_equal( stats['trans'], custats['trans'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['post'], custats['post'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['obs'], custats['obs'], decimal=3) yield lambda: np.testing.assert_array_almost_equal( stats['obs**2'], custats['obs**2'], decimal=3)