def test_train_sequences_of_different_length(self, tr_params="stmc"): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, ) h.A = self.A h.pi = self.pi h.B = self.B h.means = self.means h.covars = self.covars h.tr_params = tr_params # Generate observation sequences lengths = [30, 40, 50] X = [ h.sample(n_sequences=1, n_samples=n_samples)[0] for n_samples in lengths ] h, log_likelihoods = h._train(X, n_iter=10, conv_thresh=0.01, return_log_likelihoods=True) # we consider learning if the log_likelihood increases (the first one is discarded, because sometimes it drops # after the first iteration and increases for the rest assert np.all(np.round(np.diff(log_likelihoods[1:]), 10) >= 0)
def test_train(self, n_samples=100, n_sequences=30, tr_params="stmc"): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, init_type='kmeans', covariance_type=self.covariance_type, tr_params=tr_params, verbose=True, ) h.pi = self.pi h.A = self.A h.B = self.B h.means = 20 * self.means h.covars = np.maximum(self.covars, 0.1) # Generate observation sequences X = h.sample(n_sequences=n_sequences, n_samples=n_samples) # Mess up the parameters and see if we can re-learn them. h, log_likelihoods = h._train(X, n_iter=10, conv_thresh=0.01, return_log_likelihoods=True, n_processes=4) # we consider learning if the log_likelihood increases assert np.all(np.round(np.diff(log_likelihoods), 10) >= 0)
def test_sample(self, n_samples=1000, n_sequences=5): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, ) h.pi = self.pi h.A = self.A h.B = self.B # Make sure the means are far apart so posteriors.argmax() # n_emissionscks the actual component used to generate the observations. h.means = 20 * self.means h.covars = np.maximum(self.covars, 0.1) X, state_sequences = h.sample(n_sequences=n_sequences, n_samples=n_samples, return_states=True) assert np.all(X[i].ndim == 2 for i in range(n_sequences)) assert np.all( len(X[i]) == len(state_sequences[i]) == n_samples for i in range(n_sequences)) assert np.all( len(np.unique(X[i])) == (h.n_g_emissions + h.n_d_emissions) for i in range(n_sequences))
def test_non_trainable_emission(self, n_samples=100, n_sequences=30, tr_params="ste"): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, ) h.A = self.A h.pi = self.pi h.B = self.B h.means = self.means h.covars = self.covars h.tr_params = tr_params # Generate observation sequences X = h.sample(n_sequences=n_sequences, n_samples=n_samples) h_tst = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, nr_no_train_de=1, ) # Set up the emission probabilities and see if we can re-learn them. B_fix = np.eye(self.n_states, self.n_d_features[-1]) with pytest.raises(AttributeError): h_tst, log_likelihoods = h_tst._train(X, n_iter=100, conv_thresh=0.01, return_log_likelihoods=True, no_init=False) # we consider learning if the log_likelihood increases assert np.all(np.round(np.diff(log_likelihoods), 10) >= 0) # we want that the emissions haven't changed assert np.allclose(B_fix, h_tst.B[-1])
def test_score_samples_and_decode(self): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, ) h.pi = self.pi h.A = self.A # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means = 20 * self.means h.covars = self.covars h.B = self.B stateidx = np.repeat(np.arange(self.n_states), 5) n_samples = len(stateidx) X_gauss = self.prng.randn(n_samples, h.n_g_emissions) + h.means[stateidx] X = [] for idx, state in enumerate(stateidx): cat_sample = [] for e in range(h.n_d_emissions): cdf = np.cumsum(h.B[e][state, :]) cat_sample.append((cdf > np.random.rand()).argmax()) X.append(np.concatenate([X_gauss[idx], cat_sample])) X = [np.asarray(X)] posteriors = h.predict_proba( X )[0] # because we only had one observation sequence, but it returns a list anyways assert posteriors.shape == (n_samples, self.n_states) assert np.allclose(posteriors.sum(axis=1), np.ones(n_samples)) _, stateseq = h.decode(X) assert np.allclose(stateseq, stateidx)
def test_train_without_init(self, n_samples=100, n_sequences=30, tr_params="ste"): h = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, tr_params=tr_params, ) h.pi = self.pi h.A = self.A h.means = 20 * self.means h.covars = np.maximum(self.covars, 0.1) h.B = self.B # Generate observation sequences X = h.sample(n_sequences=n_sequences, n_samples=n_samples) h_tst = HeterogeneousHMM( n_states=self.n_states, n_g_emissions=self.n_g_emissions, n_d_emissions=self.n_d_emissions, n_d_features=self.n_d_features, covariance_type=self.covariance_type, tr_params=tr_params, ) with pytest.raises(AttributeError): h_tst, _ = h_tst._train(X, n_iter=100, conv_thresh=0.01, return_log_likelihoods=True, no_init=True, n_processes=4)