def test_set_start_state_list(self, check_state): model = MC(['b', 'a'], [1, 2]) check_state.return_value = True model.set_start_state([State('a', 0), State('b', 1)]) model_state = [State('b', 1), State('a', 0)] check_state.assert_called_once_with(model, model_state) self.assertEqual(model.state, model_state)
def test_generate_sample_less_arg(self, random_state, sample_discrete): model = MC(['a', 'b'], [2, 2]) model.transition_models['a'] = { 0: { 0: 0.1, 1: 0.9 }, 1: { 0: 0.2, 1: 0.8 } } model.transition_models['b'] = { 0: { 0: 0.3, 1: 0.7 }, 1: { 0: 0.4, 1: 0.6 } } random_state.return_value = [State('a', 0), State('b', 1)] sample_discrete.side_effect = [[1], [0]] * 2 gen = model.generate_sample(size=2) samples = [sample for sample in gen] expected_samples = [[State('a', 1), State('b', 0)]] * 2 self.assertEqual(samples, expected_samples)
def test_sample(self): model = MC(['a', 'b'], [2, 2]) model.transition_models['a'] = { 0: { 0: 0.1, 1: 0.9 }, 1: { 0: 0.2, 1: 0.8 } } model.transition_models['b'] = { 0: { 0: 0.3, 1: 0.7 }, 1: { 0: 0.4, 1: 0.6 } } sample = model.sample(start_state=[State('a', 0), State('b', 1)], size=2) self.assertEqual(len(sample), 2) self.assertEqual(list(sample.columns), ['a', 'b']) self.assertTrue( list(sample.loc[0]) in [[0, 0], [0, 1], [1, 0], [1, 1]]) self.assertTrue( list(sample.loc[1]) in [[0, 0], [0, 1], [1, 0], [1, 1]])
def test_sample_less_arg(self, random_state): model = MC(['a', 'b'], [2, 2]) random_state.return_value = [State('a', 0), State('b', 1)] sample = model.sample(size=1) random_state.assert_called_once_with(model) self.assertEqual(model.state, random_state.return_value) self.assertEqual(len(sample), 1) self.assertEqual(list(sample.columns), ['a', 'b']) self.assertEqual(list(sample.loc[0]), [0, 1])
def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [ State('diff', 0), State('intel', 0), State('grade', 0) ] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2)
def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual( {samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual( {samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'})
def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))
def setUp(self): self.variables = ['intel', 'diff', 'grade'] self.card = [3, 2, 3] self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3} self.intel_tm = { 0: { 0: 0.1, 1: 0.25, 2: 0.65 }, 1: { 0: 0.5, 1: 0.3, 2: 0.2 }, 2: { 0: 0.3, 1: 0.3, 2: 0.4 } } self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}} self.grade_tm = { 0: { 0: 0.4, 1: 0.2, 2: 0.4 }, 1: { 0: 0.9, 1: 0.05, 2: 0.05 }, 2: { 0: 0.1, 1: 0.4, 2: 0.5 } } self.start_state = [ State('intel', 0), State('diff', 1), State('grade', 2) ] self.model = MC() self.sample = DataFrame(index=range(200), columns=['a', 'b']) self.sample.a = [1] * 100 + [0] * 100 self.sample.b = [0] * 100 + [1] * 100
def set_start_state(self, start_state): """ Set the start state of the Markov Chain. If the start_state is given as a array-like iterable, its contents are reordered in the internal representation. Parameters: ----------- start_state: dict or array-like iterable object Dict (or list) of tuples representing the starting states of the variables. Examples: --------- >>> from pgmpy.models import MarkovChain as MC >>> from pgmpy.factors import State >>> model = MC(['a', 'b'], [2, 2]) >>> model.set_start_state([State('a', 0), State('b', 1)]) """ if start_state is not None: if not hasattr(start_state, '__iter__') or isinstance( start_state, six.string_types): raise ValueError('start_state must be a non-string iterable.') # Must be an array-like iterable. Reorder according to self.variables. state_dict = {var: st for var, st in start_state} start_state = [ State(var, state_dict[var]) for var in self.variables ] if start_state is None or self._check_state(start_state): self.state = start_state
def get_states(self): """ Add outcome to variables of XMLBIF Return ------ dict: dict of type {variable: outcome tags} Examples -------- >>> writer = XMLBIFWriter(model) >>> writer.get_states() {'dog-out': [<Element OUTCOME at 0x7ffbabfcdec8>, <Element OUTCOME at 0x7ffbabfcdf08>], 'family-out': [<Element OUTCOME at 0x7ffbabfd4108>, <Element OUTCOME at 0x7ffbabfd4148>], 'bowel-problem': [<Element OUTCOME at 0x7ffbabfd4088>, <Element OUTCOME at 0x7ffbabfd40c8>], 'hear-bark': [<Element OUTCOME at 0x7ffbabfcdf48>, <Element OUTCOME at 0x7ffbabfcdf88>], 'light-on': [<Element OUTCOME at 0x7ffbabfcdfc8>, <Element OUTCOME at 0x7ffbabfd4048>]} """ outcome_tag = {} cpds = self.model.get_cpds() for cpd in cpds: var = cpd.variable outcome_tag[var] = [] for state in [State(var, state) for state in range(cpd.get_cardinality([var])[var])]: state_tag = etree.SubElement(self.variables[var], "OUTCOME") state_tag.text = str(state.state) outcome_tag[var].append(state_tag) return outcome_tag
def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample( [State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({1})) self.assertTrue(set(sample.J).issubset({1})) self.assertTrue(set(sample.R).issubset({1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1}))
def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample( [State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1}))
def random_state(self): """ Generates a random state of the Markov Chain. Return Type: ------------ List of namedtuples, representing a random assignment to all variables of the model. Examples: --------- >>> from pgmpy.models import MarkovChain as MC >>> model = MC(['intel', 'diff'], [2, 3]) >>> model.random_state() [State('diff', 2), State('intel', 1)] """ return [ State(var, np.random.randint(self.cardinalities[var])) for var in self.variables ]
def generate_sample(self, start_state=None, size=1): """ Generator version of self.sample Return Type: ------------ List of State namedtuples, representing the assignment to all variables of the model. Examples: --------- >>> from pgmpy.models.MarkovChain import MarkovChain >>> from pgmpy.factors import State >>> model = MarkovChain() >>> model.add_variables_from(['intel', 'diff'], [3, 2]) >>> intel_tm = {0: {0: 0.2, 1: 0.4, 2:0.4}, 1: {0: 0, 1: 0.5, 2: 0.5}, 2: {0: 0.3, 1: 0.3, 2: 0.4}} >>> model.add_transition_model('intel', intel_tm) >>> diff_tm = {0: {0: 0.5, 1: 0.5}, 1: {0: 0.25, 1:0.75}} >>> model.add_transition_model('diff', diff_tm) >>> gen = model.generate_sample([State('intel', 0), State('diff', 0)], 2) >>> [sample for sample in gen] [[State(var='intel', state=2), State(var='diff', state=1)], [State(var='intel', state=2), State(var='diff', state=0)]] """ if start_state is None: if self.state is None: self.state = self.random_state() # else use previously-set state else: self.set_start_state(start_state) # sampled.loc[0] = [self.state[var] for var in self.variables] for i in range(size): for j, (var, st) in enumerate(self.state): next_st = sample_discrete( list(self.transition_models[var][st].keys()), list(self.transition_models[var][st].values()))[0] self.state[j] = State(var, next_st) yield self.state[:]
def sample(self, start_state=None, size=1): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. Return Type: ------------ pandas.DataFrame Examples: --------- >>> from pgmpy.models import MarkovChain as MC >>> from pgmpy.factors import State >>> model = MC(['intel', 'diff'], [2, 3]) >>> model.set_start_state([State('intel', 0), State('diff', 2)]) >>> intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}} >>> model.add_transition_model('intel', intel_tm) >>> diff_tm = {0: {0: 0.1, 1: 0.5, 2: 0.4}, 1: {0: 0.2, 1: 0.2, 2: 0.6 }, 2: {0: 0.7, 1: 0.15, 2: 0.15}} >>> model.add_transition_model('diff', diff_tm) >>> model.sample(size=5) intel diff 0 0 2 1 1 0 2 0 1 3 1 0 4 0 2 """ if start_state is None: if self.state is None: self.state = self.random_state() # else use previously-set state else: self.set_start_state(start_state) sampled = DataFrame(index=range(size), columns=self.variables) sampled.loc[0] = [st for var, st in self.state] from collections import defaultdict var_states = defaultdict(dict) var_values = defaultdict(dict) samples = defaultdict(dict) for var in self.transition_models.keys(): for st in self.transition_models[var]: var_states[var][st] = list( self.transition_models[var][st].keys()) var_values[var][st] = list( self.transition_models[var][st].values()) samples[var][st] = sample_discrete(var_states[var][st], var_values[var][st])[0] for i in range(size - 1): for j, (var, st) in enumerate(self.state): next_st = samples[var][st] self.state[j] = State(var, next_st) sampled.loc[i + 1] = [st for var, st in self.state] return sampled
def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({State('A', 0), State('A', 1)})) self.assertTrue(set(sample.J).issubset({State('J', 0), State('J', 1)})) self.assertTrue(set(sample.R).issubset({State('R', 0), State('R', 1)})) self.assertTrue(set(sample.Q).issubset({State('Q', 0), State('Q', 1)})) self.assertTrue(set(sample.G).issubset({State('G', 0), State('G', 1)})) self.assertTrue(set(sample.L).issubset({State('L', 0), State('L', 1)}))
def test_prob_from_sample(self, sample): model = MC(['a', 'b'], [2, 2]) sample.return_value = self.sample probabilites = model.prob_from_sample([State('a', 1), State('b', 0)]) self.assertEqual(list(probabilites), [1] * 50 + [0] * 50)
def test_check_state_bad_vars(self): model = MC() # state_vars and model_vars differ self.assertRaises(ValueError, model._check_state, [State(1, 2)])
def test_check_state_success(self): model = MC(['a'], [2]) self.assertTrue(model._check_state([State('a', 1)]))
def test_check_state_bad_var_value(self): model = MC(['a'], [2]) # value of variable >= cardinaliity self.assertRaises(ValueError, model._check_state, [State('a', 3)])