def test_fit_left_right(self): transmat = np.zeros((self.n_components, self.n_components)) # Left-to-right: each state is connected to itself and its # direct successor. for i in range(self.n_components): if i == self.n_components - 1: transmat[i, i] = 1.0 else: transmat[i, i] = transmat[i, i + 1] = 0.5 # Always start in first state startprob = np.zeros(self.n_components) startprob[0] = 1.0 lengths = [10, 8, 1] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, covariance_type="diag", params="mct", init_params="cm") h.startprob_ = startprob.copy() h.transmat_ = transmat.copy() h.fit(X) self.assertTrue((h.startprob_[startprob == 0.0] == 0.0).all()) self.assertTrue((h.transmat_[transmat == 0.0] == 0.0).all()) posteriors = h.predict_proba(X) self.assertFalse(np.isnan(posteriors).any()) self.assertTrue(np.allclose(posteriors.sum(axis=1), 1.)) score, state_sequence = h.decode(X, algorithm="viterbi") self.assertTrue(np.isfinite(score))
def test_fit_with_priors(self, params='stmc', n_iter=5): self.setUp() startprob_prior = 10 * self.startprob + 2.0 transmat_prior = 10 * self.transmat + 2.0 means_prior = self.means means_weight = 2.0 covars_weight = 2.0 if self.covariance_type in ('full', 'tied'): covars_weight += self.n_features covars_prior = self.covars h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.startprob_prior = startprob_prior h.transmat_ = normalized( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.transmat_prior = transmat_prior h.means_ = 20 * self.means h.means_prior = means_prior h.means_weight = means_weight h.covars_ = self.covars h.covars_prior = covars_prior h.covars_weight = covars_weight lengths = [200] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Re-initialize the parameters and check that we can converge to the # original parameter values. h_learn = hmm.GaussianHMM(self.n_components, self.covariance_type, params=params) h_learn.n_iter = 0 h_learn.fit(X, lengths=lengths) self.assertTrue(log_likelihood_increasing(h_learn, X, lengths, n_iter)) # Make sure we've converged to the right parameters. # a) means self.assertTrue( np.allclose(sorted(h.means_.tolist()), sorted(h_learn.means_.tolist()), 0.01)) # b) covars are hard to estimate precisely from a relatively small # sample, thus the large threshold self.assertTrue( np.allclose(sorted(h._covars_.tolist()), sorted(h_learn._covars_.tolist()), 10))
def test_fit_sequences_of_different_length(self): self.setUp() lengths = [3, 4, 5] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, self.covariance_type) # This shouldn't raise # ValueError: setting an array element with a sequence. self.assertIsNotNone(h.fit(X, lengths=lengths))
def test_bad_covariance_type(self): self.setUp() with self.assertRaises(ValueError): h = hmm.GaussianHMM(20, covariance_type='badcovariance_type') h.means_ = self.means h.covars_ = [] h.startprob_ = self.startprob h.transmat_ = self.transmat h._check()
def test_fit_with_length_one_signal(self): self.setUp() lengths = [10, 8, 1] X = self.prng.rand(sum(lengths), self.n_features) h = hmm.GaussianHMM(self.n_components, self.covariance_type) # This shouldn't raise # ValueError: zero-size array to reduction operation maximum which # has no identity self.assertIsNotNone(h.fit(X, lengths=lengths))
def test_covar_is_writeable(self): h = hmm.GaussianHMM(n_components=1, covariance_type="diag", init_params="c") X = np.random.normal(size=(1000, 5)) h._init(X) # np.diag returns a read-only view of the array in NumPy 1.9.X. # Make sure this doesn't prevent us from fitting an HMM with # diagonal covariance matrix. See PR#44 on GitHub for details # and discussion. self.assertTrue(h._covars_.flags["WRITEABLE"])
def test_sample(self, n=1000): self.setUp() h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.transmat_ = self.transmat # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means_ = 20 * self.means h.covars_ = np.maximum(self.covars, 0.1) X, state_sequence = h.sample(n, random_state=self.prng) self.assertEqual(X.shape, (n, self.n_features)) self.assertEqual(len(state_sequence), n)
def test_fit(self, params='stmc', n_iter=5, **kwargs): self.setUp() h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.transmat_ = normalized( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.means_ = 20 * self.means h.covars_ = self.covars lengths = [10] * 10 X, _state_sequence = h.sample(sum(lengths), random_state=self.prng) # Mess up the parameters and see if we can re-learn them. # TODO: change the params and uncomment the check self.assertIsNotNone(h.fit(X, lengths=lengths))
def test_fit_zero_variance(self): self.setUp() # Example from issue #2 on GitHub. X = np.asarray([ [7.15000000e+02, 5.85000000e+02, 0.00000000e+00, 0.00000000e+00], [7.15000000e+02, 5.20000000e+02, 1.04705811e+00, -6.03696289e+01], [7.15000000e+02, 4.55000000e+02, 7.20886230e-01, -5.27055664e+01], [7.15000000e+02, 3.90000000e+02, -4.57946777e-01, -7.80605469e+01], [7.15000000e+02, 3.25000000e+02, -6.43127441e+00, -5.59954834e+01], [7.15000000e+02, 2.60000000e+02, -2.90063477e+00, -7.80220947e+01], [7.15000000e+02, 1.95000000e+02, 8.45532227e+00, -7.03294373e+01], [7.15000000e+02, 1.30000000e+02, 4.09387207e+00, -5.83621216e+01], [7.15000000e+02, 6.50000000e+01, -1.21667480e+00, -4.48131409e+01] ]) h = hmm.GaussianHMM(3, self.covariance_type) self.assertIsNotNone(h.fit(X))
def test_score_samples_and_decode(self): self.setUp() h = hmm.GaussianHMM(self.n_components, self.covariance_type, init_params="st") h.means_ = self.means h.covars_ = self.covars # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means_ = 20 * h.means_ gaussidx = np.repeat(np.arange(self.n_components), 5) n_samples = len(gaussidx) X = self.prng.randn(n_samples, self.n_features) + h.means_[gaussidx] h._init(X) ll, posteriors = h.score_samples(X) self.assertEqual(posteriors.shape, (n_samples, self.n_components)) assert np.allclose(posteriors.sum(axis=1), np.ones(n_samples)) viterbi_ll, stateseq = h.decode(X) assert np.allclose(stateseq, gaussidx)