def test_train(self, params='stmc', niter=5): h = hmm.GaussianHMM(self.nstates, self.ndim, self.cvtype) h.startprob = self.startprob tmp = self.transmat + np.diag(np.random.rand(self.nstates)) h.transmat = tmp / np.tile(tmp.sum(axis=1), (self.nstates, 1)).T h.means = 20 * self.means h.covars = self.covars[self.cvtype] # Create a training and testing set by sampling from the same # distribution. train_obs = [h.rvs(n=10) for x in xrange(50)] test_obs = [h.rvs(n=10) for x in xrange(5)] h.init(train_obs, minit='points') init_testll = [h.lpdf(x) for x in test_obs] trainll = h.train(train_obs, iter=niter, params=params) if not np.all(np.diff(trainll) > 0): print print 'Test train: %s (%s)\n %s\n %s' % ( self.cvtype, params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -0.5)) post_testll = [h.lpdf(x) for x in test_obs] if not (np.sum(post_testll) > np.sum(init_testll)): print print 'Test train: %s (%s)\n %s\n %s' % ( self.cvtype, params, init_testll, post_testll) self.assertTrue(np.sum(post_testll) > np.sum(init_testll))
def test_rvs(self, n=1000): h = hmm.GaussianHMM(self.nstates, self.ndim, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means = 20 * self.means h.covars = np.maximum(self.covars[self.cvtype], 0.1) h.startprob = self.startprob samples = h.rvs(n) self.assertEquals(samples.shape, (n, self.ndim))
def setUp(self): # 建立两个HMM,隐藏状态个数为4,X可能分布为10类 n_state = 4 n_feature = 2 # 表示观测值的维度 X_length = 1000 n_batch = 200 # 批量数目 self.n_batch = n_batch self.X_length = X_length self.test_hmm = hmm.GaussianHMM(n_state, n_feature) self.comp_hmm = ContrastHMM(n_state, n_feature) self.X, self.Z = self.comp_hmm.module.sample(self.X_length * 10) self.test_hmm.train(self.X, self.Z)
def test_train(self, params='stmc', niter=5): covars_weight = 2.0 if self.cvtype in ('full', 'tied'): covars_weight += self.ndim trainer = hmm.hmm_trainers.GaussianHMMMAPTrainer( startprob_prior=10 * self.startprob + 2.0, transmat_prior=10 * self.transmat + 2.0, means_prior=self.means, means_weight=2.0, covars_prior=self.covars[self.cvtype], covars_weight=covars_weight) h = hmm.GaussianHMM(self.nstates, self.ndim, self.cvtype, trainer=trainer) h.startprob = self.startprob tmp = self.transmat + np.diag(np.random.rand(self.nstates)) h.transmat = tmp / np.tile(tmp.sum(axis=1), (self.nstates, 1)).T h.means = 20 * self.means h.covars = self.covars[self.cvtype] # Create a training and testing set by sampling from the same # distribution. train_obs = [h.rvs(n=10) for x in xrange(10)] test_obs = [h.rvs(n=10) for x in xrange(5)] h.init(train_obs, minit='points') init_testll = [h.lpdf(x) for x in test_obs] trainll = h.train(train_obs, iter=niter, params=params) if not np.all(np.diff(trainll) > 0): print print 'Test MAP train: %s (%s)\n %s\n %s' % ( self.cvtype, params, trainll, np.diff(trainll)) self.assertTrue(np.all(np.diff(trainll) > -0.5)) post_testll = [h.lpdf(x) for x in test_obs] if not (np.sum(post_testll) > np.sum(init_testll)): print print 'Test MAP train: %s (%s)\n %s\n %s' % ( self.cvtype, params, init_testll, post_testll) self.assertTrue(np.sum(post_testll) > np.sum(init_testll))
def test_eval_and_decode(self): h = hmm.GaussianHMM(self.nstates, self.ndim, self.cvtype, means=self.means, covars=self.covars[self.cvtype]) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means = 20 * h.means gaussidx = np.repeat(range(self.nstates), 5) nobs = len(gaussidx) obs = np.random.randn(nobs, self.ndim) + h.means[gaussidx] ll, posteriors = h.eval(obs) self.assertEqual(posteriors.shape, (nobs, self.nstates)) assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs)) viterbi_ll, stateseq = h.decode(obs) assert_array_equal(stateseq, gaussidx)
def test_train_batch(self): X = [] Z = [] for b in range(self.n_batch): b_X, b_Z = self.comp_hmm.module.sample(self.X_length) X.append(b_X) Z.append(b_Z) batch_hmm = hmm.GaussianHMM(self.test_hmm.n_state, self.test_hmm.x_size) batch_hmm.train_batch(X, Z) # 判断概率参数是否接近 self.assertAlmostEqual( s_error(batch_hmm.start_prob, self.comp_hmm.module.startprob_), 0, 1) self.assertAlmostEqual( s_error(batch_hmm.transmat_prob, self.comp_hmm.module.transmat_), 0, 1) self.assertAlmostEqual( s_error(batch_hmm.emit_means, self.comp_hmm.module.means_), 0, 1) self.assertAlmostEqual( s_error(batch_hmm.emit_covars, self.comp_hmm.module.covars_), 0, 1)
def test_attributes(self): h = hmm.GaussianHMM(self.nstates, self.ndim, self.cvtype) self.assertEquals(h.emission_type, 'gaussian') self.assertEquals(h.nstates, self.nstates) self.assertEquals(h.ndim, self.ndim) self.assertEquals(h.cvtype, self.cvtype) h.startprob = self.startprob assert_array_almost_equal(h.startprob, self.startprob) self.assertRaises(ValueError, h.__setattr__, 'startprob', 2 * self.startprob) self.assertRaises(ValueError, h.__setattr__, 'startprob', []) self.assertRaises(ValueError, h.__setattr__, 'startprob', np.zeros((self.nstates - 2, self.ndim))) h.transmat = self.transmat assert_array_almost_equal(h.transmat, self.transmat) self.assertRaises(ValueError, h.__setattr__, 'transmat', 2 * self.transmat) self.assertRaises(ValueError, h.__setattr__, 'transmat', []) self.assertRaises(ValueError, h.__setattr__, 'transmat', np.zeros((self.nstates - 2, self.nstates))) h.means = self.means assert_array_almost_equal(h.means, self.means) self.assertRaises(ValueError, h.__setattr__, 'means', []) self.assertRaises(ValueError, h.__setattr__, 'means', np.zeros((self.nstates - 2, self.ndim))) h.covars = self.covars[self.cvtype] assert_array_almost_equal(h.covars, self.covars[self.cvtype]) self.assertRaises(ValueError, h.__setattr__, 'covars', []) self.assertRaises(ValueError, h.__setattr__, 'covars', np.zeros((self.nstates - 2, self.ndim)))
def test_bad_cvtype(self): h = hmm.GaussianHMM(20, 1, self.cvtype) self.assertRaises(ValueError, hmm.HMM, 20, 1, 'badcvtype')
activities = mergedDataset['Activity'].unique().tolist() giusti = test['Activity'].values.flatten() for idx, val in enumerate(giusti): giusti[idx] = activities.index(val) states = ["Rainy", "Sunny"] n_states = len(activities) observations = ["walk", "shop", "clean"] n_observations = len(emissions) start_probability = np.array([0.6, 0.4]) transition_probability = np.array([[0.7, 0.3], [0.4, 0.6]]) emission_probability = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) banana = np.array(obsProb.values) model = hmm.GaussianHMM(n_components=n_states) model.startprob = np.array(startProb.values) model.transmat = np.array(transProb.values) model.emissionprob = np.array(obsProb.values) # predict a sequence of hidden states based on visible states bob_says = np.array([[0, 2, 1, 1, 2, 0]]).T bob = np.array([emissions]).T model = model.fit(bob) logprob, alice_hears = model.decode(bob, algorithm="viterbi") print("Bob says:", ", ".format(bob)) print("Bob says:", ", ".format(alice_hears)) print("Bob says:", ", ".format(giusti))