def test_semi_markov(self): smm = SemiMarkov(5, min_dwell=3, max_dwell=13, dwell_times=4.3) n = 28 seq = smm.sample(n) self.assertEqual(len(seq), n)
def test_pure_markov(self): smm = SemiMarkov(4) n = 12 seq = smm.sample(n) self.assertEqual(len(seq), n)
def test_default_seed_is_zero(self): smm2 = SemiMarkov(self.n_components, rng=0) n = 50 seq1 = self.smm.sample(n) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
def test_int_seed_uses_numpy_default_rng(self): smm2 = SemiMarkov(self.n_components, rng=np.random.default_rng(0)) n = 43 seq1 = self.smm.sample(n) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
def setUp(self): self.n_components = 4 self.min_dwell = [2, 3, 1, 0] self.max_dwell = [5, 3, np.inf, 4] self.dwell_times = [2.5, 3.0, 3.5, 2.0] self.smm = SemiMarkov( self.n_components, min_dwell=self.min_dwell, max_dwell=self.max_dwell, dwell_times=self.dwell_times, )
def test_all_transitions_occur_if_dwell_time_larger_than_one(self): n_components = 3 smm = SemiMarkov(n_components, rng=5, dwell_times=2.0) n_seq = 5 n = 200 for i in range(n_seq): seq = smm.sample(n) pairs = set(zip(seq, seq[1:])) self.assertEqual(len(pairs), n_components**2)
def test_all_non_dwelling_transitions_occur_by_default(self): n_components = 3 smm = SemiMarkov(n_components, rng=4) n_seq = 5 n = 200 for i in range(n_seq): seq = smm.sample(n) pairs = set(zip(seq, seq[1:])) self.assertEqual(len(pairs), n_components * (n_components - 1))
def test_transform_second_retval_is_usage_seq_from_semi_markov_when_ret_usseq( self): arma_hsmm = ArmaHSMM(self.armas) n = 15 _, usage_seq = arma_hsmm.transform(n, return_usage_seq=True) smm = SemiMarkov(2) usage_seq_exp = smm.sample(n) np.testing.assert_allclose(usage_seq, usage_seq_exp)
def test_single_non_zero_initial_prob(self): n_components = 6 start_prob = np.zeros(n_components) state_idx = 3 start_prob[state_idx] = 1.0 smm = SemiMarkov(n_components, start_prob=start_prob) n_seq = 40 n = 4 for i in range(n_seq): seq = smm.sample(n) self.assertEqual(seq[0], state_idx, f"at iteration {i}")
def test_all_states_occur_by_default(self): n_components = 5 smm = SemiMarkov(n_components, rng=2) n_seq = 50 n = 3 all_ini_states = [] for i in range(n_seq): seq = smm.sample(n) all_ini_states.append(seq[0]) self.assertEqual(set(all_ini_states), set(range(n_components)))
def test_additional_init_kwargs_passed_to_semi_markov(self): kwargs = dict(max_dwell=20, rng=5) arma_hsmm = ArmaHSMM(self.armas, **kwargs) n = 15 _, _, usage_seq = arma_hsmm.transform(n, return_input=True, return_usage_seq=True) smm = SemiMarkov(2, **kwargs) usage_seq_exp = smm.sample(n) np.testing.assert_allclose(usage_seq, usage_seq_exp)
class TestSemiMarkovDwellTimeConstraintsObeyed(unittest.TestCase): def setUp(self): self.n_components = 4 self.min_dwell = [2, 3, 1, 0] self.max_dwell = [5, 3, np.inf, 4] self.dwell_times = [2.5, 3.0, 3.5, 2.0] self.smm = SemiMarkov( self.n_components, min_dwell=self.min_dwell, max_dwell=self.max_dwell, dwell_times=self.dwell_times, ) @staticmethod def to_rle(seq: np.ndarray) -> list: starts = np.hstack(([0], np.diff(seq).nonzero()[0] + 1, len(seq))) rle = [(seq[tmp1], tmp2 - tmp1) for tmp1, tmp2 in zip(starts, starts[1:])] return rle def test_dwell_times_reach_minimum_but_do_not_go_below(self): seq = self.smm.sample(300) seq_rle = self.to_rle(seq) for i in range(self.n_components): # last element of RLE might be truncated due to n_samples dwell_times = [_[1] for _ in seq_rle[:-1] if _[0] == i] self.assertGreater(len(dwell_times), 0, f"State {i} does not occur") self.assertEqual(np.min(dwell_times), max(1, self.min_dwell[i]), f"State {i}") def test_dwell_times_reach_maximum_but_do_not_go_above(self): seq = self.smm.sample(2000) seq_rle = self.to_rle(seq) for i in range(self.n_components): # last element of RLE might be truncated due to n_samples dwell_times = [_[1] for _ in seq_rle[:-1] if _[0] == i] self.assertGreater(len(dwell_times), 0, f"State {i} does not occur") if np.isfinite(self.max_dwell[i]): self.assertEqual(np.max(dwell_times), self.max_dwell[i], f"State {i}") else: self.assertGreater(len(np.unique(dwell_times)), 0, f"State {i}")
def __init__(self, models: Sequence, **kwargs): """ Initialize the ARMA HSMM. Parameters ---------- models Sequence of models to use. This sets the number of states in the semi-Markov model. All other keyword arguments are passed to the semi-Markov model constructor. """ self.models = models self.smm = SemiMarkov(len(models), **kwargs) self.n_features = 1 self.n_components = 1
def test_several_non_zero_initial_prob(self): n_components = 6 start_prob = np.zeros(n_components) state_idxs = [1, 3, 5] start_prob[state_idxs] = 1.0 / len(state_idxs) smm = SemiMarkov(n_components, start_prob=start_prob, rng=1) n_seq = 40 n = 4 all_ini_states = [] for i in range(n_seq): seq = smm.sample(n) self.assertIn(seq[0], state_idxs, f"at iteration {i}") all_ini_states.append(seq[0]) self.assertEqual(set(all_ini_states), set(state_idxs))
def test_absorbing_state(self): n_components = 3 abs_idx = 2 dwell_times = np.ones(n_components) dwell_times[abs_idx] = np.inf smm = SemiMarkov(n_components, dwell_times=dwell_times) n_seq = 10 n = 16 for i in range(n_seq): seq = smm.sample(n) where_abs = (seq == abs_idx).nonzero()[0] # make sure we reach absorbing state self.assertGreater(len(where_abs), 0) # and stay there once we've reached it np.testing.assert_equal(seq[where_abs[0]:], abs_idx)
def __getitem__(self, idx: int) -> SwitchingSnippetSignal: """ Generate a signal. Parameters ---------- idx The index of the signal to generate. Returns a `SwitchingSnippetSignal` instance. Note that the signal is generated anew every time it is accessed, making this a potentially time-consuming operation. """ if idx < 0: idx = self.n_signals + idx if idx < 0 or idx >= self.n_signals: raise IndexError("index out of range") seed = self.signal_seeds[idx] rng = np.random.default_rng(seed) # create an ArmaHSMM instance semi_markov = SemiMarkov(len(self.snippets), rng=rng, **self.semi_markov_kws) usage_seq = semi_markov.sample(self.n_samples) # generate the signal y = np.zeros(self.n_samples) usage_rle = rle_encode(usage_seq) idx = 0 for elem, n in usage_rle: crt_snippet = self.snippets[elem] crt_start = rng.integers(0, len(crt_snippet) - n + 1) y[idx:idx + n] = crt_snippet[crt_start:crt_start + n] idx += n # normalize, if asked to if self.normalize: scale = 1.0 / np.std(y) y *= scale else: scale = 1.0 return SwitchingSnippetSignal(y=y, usage_seq=usage_seq, scale=scale)
def test_deterministic_cyclic_transitions(self): n_components = 4 cycle = [2, 3, 1, 0] trans_mat = np.zeros((n_components, n_components)) allowed_pairs = set() for i in range(len(cycle)): s1 = cycle[i] s2 = cycle[(i + 1) % len(cycle)] trans_mat[s1, s2] = 1.0 allowed_pairs.add((s1, s2)) smm = SemiMarkov(n_components, trans_mat=trans_mat) n_seq = 6 n = 40 for i in range(n_seq): seq = smm.sample(n) pairs = set(zip(seq, seq[1:])) for s1, s2 in pairs: self.assertIn((s1, s2), allowed_pairs, f"iteration {i}")
class TestSemiMarkovStateContentOfOutputIsRight(unittest.TestCase): def setUp(self): self.n_components = 4 self.smm = SemiMarkov(self.n_components, rng=1) def test_all_states_are_between_zero_and_n_components(self): n = 100 seq = self.smm.sample(n) self.assertGreaterEqual(np.min(seq), 0) self.assertLess(np.max(seq), self.n_components) def test_lowest_state_in_long_sequence_is_zero(self): n = 200 seq = self.smm.sample(n) self.assertEqual(np.min(seq), 0) def test_highest_state_in_long_sequence_is_n_components_minus_one(self): n = 200 seq = self.smm.sample(n) self.assertEqual(np.max(seq), self.n_components - 1)
class TestSemiMarkovWithPseudorandom(unittest.TestCase): def setUp(self): self.n_components = 4 self.smm = SemiMarkov(self.n_components) def test_repeated_runs_yield_different_results(self): n = 100 seq1 = self.smm.sample(n) seq2 = self.smm.sample(n) self.assertGreater(np.max(np.abs(seq1 - seq2)), 0) def test_default_seed_is_zero(self): smm2 = SemiMarkov(self.n_components, rng=0) n = 50 seq1 = self.smm.sample(n) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2) def test_output_is_int(self): n = 23 seq = self.smm.sample(n) dtype = np.asarray(seq).dtype self.assertTrue(np.issubdtype(dtype, np.integer)) def test_int_seed_uses_numpy_default_rng(self): smm2 = SemiMarkov(self.n_components, rng=np.random.default_rng(0)) n = 43 seq1 = self.smm.sample(n) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
def setUp(self): self.n_components = 3 self.start_prob = np.asarray([0.2, 0.5, 0.3]) self.trans_mat = np.asarray([[0.0, 0.3, 0.7], [0.1, 0.0, 0.9], [0.5, 0.5, 0.0]]) self.dwell_times = np.asarray([2.5, 4.5, 3.0]) self.min_dwell = np.asarray([2, 3, 1]) self.max_dwell = np.asarray([np.inf, 10, 8]) self.rng = np.random.default_rng(4) self.smm = SemiMarkov( self.n_components, start_prob=self.start_prob, trans_mat=self.trans_mat, dwell_times=self.dwell_times, min_dwell=self.min_dwell, max_dwell=self.max_dwell, rng=self.rng, )
def test_default_dwell_times_are_equal_to_one(self): ns = 3 rng = np.random.default_rng(3) trans_mat = rng.uniform(size=(ns, ns)) smm1 = SemiMarkov(ns, trans_mat=trans_mat) n = 10 seq1 = smm1.sample(n) smm2 = SemiMarkov(ns, trans_mat=trans_mat, dwell_times=1) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
def test_diagonal_trans_mat_elements_are_ignored(self): ns = 5 rng = np.random.default_rng(1) trans_mat1 = rng.uniform(size=(ns, ns)) trans_mat2 = np.copy(trans_mat1) trans_mat1 += np.diag(rng.uniform(low=-1, high=1, size=ns)) smm1 = SemiMarkov(ns, trans_mat=trans_mat1) n = 32 seq1 = smm1.sample(n) smm2 = SemiMarkov(ns, trans_mat=trans_mat2) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
def test_dwell_times_affect_generated_sequence(self): ns = 3 rng = np.random.default_rng(4) trans_mat = rng.uniform(size=(ns, ns)) dwell_times1 = rng.uniform(low=0, high=10, size=ns) smm1 = SemiMarkov(ns, trans_mat=trans_mat, dwell_times=dwell_times1) n = 100 seq1 = smm1.sample(n) dwell_times2 = rng.uniform(low=0, high=10, size=ns) smm2 = SemiMarkov(ns, trans_mat=trans_mat, dwell_times=dwell_times2) seq2 = smm2.sample(n) self.assertGreater(np.max(np.abs(seq1 - seq2)), 0)
def test_offdiagonal_trans_mat_elements_are_normalized(self): ns = 4 rng = np.random.default_rng(2) trans_mat1 = rng.uniform(size=(ns, ns)) trans_mat1 -= np.diag(np.diag(trans_mat1)) trans_mat2 = 3.2 * trans_mat1 smm1 = SemiMarkov(ns, trans_mat=trans_mat1) n = 27 seq1 = smm1.sample(n) smm2 = SemiMarkov(ns, trans_mat=trans_mat2) seq2 = smm2.sample(n) np.testing.assert_equal(seq1, seq2)
class ArmaHSMM(object): """ A hidden semi-Markov model with ARMA emissions. This class can be used to generate samples from a non-stationary stochastic process that stochastically switches between several ARMA processes based on a hidden semi-Markov model. Attributes ========== n_features : int Number of input dimensions. This is always equal to 1. n_components : int Number of output dimensions.This is always equal to 1. models Sequence of models to use. smm Semi-Markov model used to generate ARMA states. """ def __init__(self, models: Sequence, **kwargs): """ Initialize the ARMA HSMM. Parameters ---------- models Sequence of models to use. This sets the number of states in the semi-Markov model. All other keyword arguments are passed to the semi-Markov model constructor. """ self.models = models self.smm = SemiMarkov(len(models), **kwargs) self.n_features = 1 self.n_components = 1 def transform( self, n_samples: Optional[int] = None, X: Union[None, Sequence, Callable] = None, initial_conditions: Optional[Tuple[Sequence, Sequence]] = None, return_input: bool = False, return_usage_seq: bool = False, ) -> Union[ np.ndarray, Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray, np.ndarray], ]: """ Process input samples. The function uses exactly `n_samples` input samples. If no input source is explicitly provided, the default source for each of the ARMAs is used. An exception is raised if a process needs to be used that does not have a default source. Parameters ---------- n_samples Number of samples to generate. If not provided, `U` must be provided and it must be a sequence. X Input samples or input generator. See `Arma.transform`. initial_conditions A tuple, `(initial_y, initial_x)`, of recent samples of the output and input sequences used to seed the simulation. If these are not provided, they are assumed equal to zero. return_input If true, returns both output and input. If false (the default), returns only the output. return_usage_seq If true, returns the `usage_seq` in addition to output (and potentially input). Returns either a single array (`Y`) if `return_input` and `return_usage_seq` are both false; or a tuple `(Y, X)` or `(Y, usage_sea)` if only `return_input` or only `return_usage_seq` is true, respectively; or a tuple `(Y, X, usage_seq)` if both are true. Here `Y` is an array of generated `y`; `X` contains the input `x` samples; and `usage_seq` is an integer array indicating which model was used at each time step. If the `X` parameter was used and was a sequence, the output `X` simply mirrors the input `X`. """ # check inputs if n_samples is None: if X is None or not hasattr(X, "__len__"): raise ValueError("Need either n_samples or sequence U.") n_samples = len(X) # generate usage sequence, then use sample_switching_models usage_seq = self.smm.sample(n_samples) y, x = sample_switching_models( self.models, usage_seq, X=X, initial_conditions=initial_conditions, return_input=True, ) res = (y,) if return_input: res = res + (x,) if return_usage_seq: res = res + (usage_seq,) if len(res) == 1: return res[0] else: return res def __repr__(self) -> str: r = f"ArmaHSMM(models={repr(self.models)}, smm={repr(self.smm)})" return r def __str__(self) -> str: s = f"ArmaHSMM(models={str(self.models)}, smm={str(self.smm)})" return s
def setUp(self): self.n_components = 4 self.smm = SemiMarkov(self.n_components)
def test_no_error(self): smm = SemiMarkov(5, rng=np.random.RandomState(1)) n = 13 seq = smm.sample(n) self.assertEqual(len(seq), n)
def test_raises_value_error_if_dwell_times_not_below_dwell_max(self): with self.assertRaises(ValueError): SemiMarkov(2, dwell_times=[100, 10], max_dwell=20)
def test_raises_value_error_if_dwell_times_not_above_dwell_min(self): with self.assertRaises(ValueError): SemiMarkov(2, dwell_times=[100, 10], min_dwell=20)