Ejemplo n.º 1
0
 def test_set_start_state_list(self, check_state):
     model = MC(['b', 'a'], [1, 2])
     check_state.return_value = True
     model.set_start_state([State('a', 0), State('b', 1)])
     model_state = [State('b', 1), State('a', 0)]
     check_state.assert_called_once_with(model, model_state)
     self.assertEqual(model.state, model_state)
Ejemplo n.º 2
0
 def test_generate_sample_less_arg(self, random_state, sample_discrete):
     model = MC(['a', 'b'], [2, 2])
     model.transition_models['a'] = {
         0: {
             0: 0.1,
             1: 0.9
         },
         1: {
             0: 0.2,
             1: 0.8
         }
     }
     model.transition_models['b'] = {
         0: {
             0: 0.3,
             1: 0.7
         },
         1: {
             0: 0.4,
             1: 0.6
         }
     }
     random_state.return_value = [State('a', 0), State('b', 1)]
     sample_discrete.side_effect = [[1], [0]] * 2
     gen = model.generate_sample(size=2)
     samples = [sample for sample in gen]
     expected_samples = [[State('a', 1), State('b', 0)]] * 2
     self.assertEqual(samples, expected_samples)
Ejemplo n.º 3
0
 def test_sample(self):
     model = MC(['a', 'b'], [2, 2])
     model.transition_models['a'] = {
         0: {
             0: 0.1,
             1: 0.9
         },
         1: {
             0: 0.2,
             1: 0.8
         }
     }
     model.transition_models['b'] = {
         0: {
             0: 0.3,
             1: 0.7
         },
         1: {
             0: 0.4,
             1: 0.6
         }
     }
     sample = model.sample(start_state=[State('a', 0),
                                        State('b', 1)],
                           size=2)
     self.assertEqual(len(sample), 2)
     self.assertEqual(list(sample.columns), ['a', 'b'])
     self.assertTrue(
         list(sample.loc[0]) in [[0, 0], [0, 1], [1, 0], [1, 1]])
     self.assertTrue(
         list(sample.loc[1]) in [[0, 0], [0, 1], [1, 0], [1, 1]])
Ejemplo n.º 4
0
 def test_sample_less_arg(self, random_state):
     model = MC(['a', 'b'], [2, 2])
     random_state.return_value = [State('a', 0), State('b', 1)]
     sample = model.sample(size=1)
     random_state.assert_called_once_with(model)
     self.assertEqual(model.state, random_state.return_value)
     self.assertEqual(len(sample), 1)
     self.assertEqual(list(sample.columns), ['a', 'b'])
     self.assertEqual(list(sample.loc[0]), [0, 1])
Ejemplo n.º 5
0
 def test_sample_less_arg(self, random_state):
     self.gibbs.state = None
     random_state.return_value = [
         State('diff', 0),
         State('intel', 0),
         State('grade', 0)
     ]
     sample = self.gibbs.sample(size=2)
     random_state.assert_called_once_with(self.gibbs)
     self.assertEqual(len(sample), 2)
 def test_sample(self):
     start_state = [State("diff", 0), State("intel", 0), State("grade", 0)]
     sample = self.gibbs.sample(start_state, 2)
     self.assertEquals(len(sample), 2)
     self.assertEquals(len(sample.columns), 3)
     self.assertIn("diff", sample.columns)
     self.assertIn("intel", sample.columns)
     self.assertIn("grade", sample.columns)
     self.assertTrue(set(sample["diff"]).issubset({0, 1}))
     self.assertTrue(set(sample["intel"]).issubset({0, 1}))
     self.assertTrue(set(sample["grade"]).issubset({0, 1, 2}))
Ejemplo n.º 7
0
 def test_generate_sample(self):
     start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
     gen = self.gibbs.generate_sample(start_state, 2)
     samples = [sample for sample in gen]
     self.assertEqual(len(samples), 2)
     self.assertEqual(
         {samples[0][0].var, samples[0][1].var, samples[0][2].var},
         {'diff', 'intel', 'grade'})
     self.assertEqual(
         {samples[1][0].var, samples[1][1].var, samples[1][2].var},
         {'diff', 'intel', 'grade'})
Ejemplo n.º 8
0
 def test_sample(self):
     start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
     sample = self.gibbs.sample(start_state, 2)
     self.assertEquals(len(sample), 2)
     self.assertEquals(len(sample.columns), 3)
     self.assertIn('diff', sample.columns)
     self.assertIn('intel', sample.columns)
     self.assertIn('grade', sample.columns)
     self.assertTrue(set(sample['diff']).issubset({0, 1}))
     self.assertTrue(set(sample['intel']).issubset({0, 1}))
     self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))
Ejemplo n.º 9
0
    def setUp(self):
        self.variables = ['intel', 'diff', 'grade']
        self.card = [3, 2, 3]
        self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3}
        self.intel_tm = {
            0: {
                0: 0.1,
                1: 0.25,
                2: 0.65
            },
            1: {
                0: 0.5,
                1: 0.3,
                2: 0.2
            },
            2: {
                0: 0.3,
                1: 0.3,
                2: 0.4
            }
        }
        self.intel_tm_matrix = np.array([[0.1, 0.25, 0.65], [0.5, 0.3, 0.2],
                                         [0.3, 0.3, 0.4]])
        self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}}
        self.diff_tm_matrix = np.array([[0.3, 0.7], [0.75, 0.25]])
        self.grade_tm = {
            0: {
                0: 0.4,
                1: 0.2,
                2: 0.4
            },
            1: {
                0: 0.9,
                1: 0.05,
                2: 0.05
            },
            2: {
                0: 0.1,
                1: 0.4,
                2: 0.5
            }
        }
        self.grade_tm_matrix = [[0.4, 0.2, 0.4], [0.9, 0.05, 0.05],
                                [0.1, 0.4, 0.5]]
        self.start_state = [
            State('intel', 0),
            State('diff', 1),
            State('grade', 2)
        ]
        self.model = MC()

        self.sample = DataFrame(index=range(200), columns=['a', 'b'])
        self.sample.a = [1] * 100 + [0] * 100
        self.sample.b = [0] * 100 + [1] * 100
Ejemplo n.º 10
0
def generate_time_series(
    sampler: BayesianModelSampling,
    length: int,
    labels: typing.List[str],
    seed: int = 42,
):
    # Initialize progress bar
    pbar = notebook.tqdm(total=length)

    # Generate first sample given no evidence
    with io.capture_output() as captured:
        # When no evidence is provided, the function under-the-hood performs forward sampling
        sample = sampler.rejection_sample(seed=seed)
    sample = sample.reindex(sorted(sample.columns), axis=1)

    # Split sample in 'current' and 'next' slices:
    # - the 'current' slice will be the first row of the generated time series
    # - the 'next' slice is added as the second row, and will be used as
    # evidence for subsequent predictions
    df_synth = sample.filter(regex="_T$")
    next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist()
    df_synth = df_synth.append(pd.Series(next_slice, index=df_synth.columns),
                               ignore_index=True)
    evidence = [
        State(n, v) for n, v in zip(df_synth.columns.values, next_slice)
    ]

    # Update progress bar
    pbar.update(2)

    for _ in range(2, length):
        # Generate new data
        with io.capture_output() as captured:
            sample = sampler.rejection_sample(evidence=evidence)
        sample = sample.reindex(sorted(sample.columns), axis=1)

        # Append 'next' slice to the generated time series, and use it as new evidence
        next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist()
        df_synth = df_synth.append(pd.Series(next_slice,
                                             index=df_synth.columns),
                                   ignore_index=True)
        evidence = [
            State(n, v) for n, v in zip(df_synth.columns.values, next_slice)
        ]

        # Update progress bar
        pbar.update(1)
    # Close progress bar
    pbar.close()
    # Update column names
    df_synth.columns = labels
    return df_synth
 def test_generate_sample(self):
     start_state = [State("diff", 0), State("intel", 0), State("grade", 0)]
     gen = self.gibbs.generate_sample(start_state, 2)
     samples = [sample for sample in gen]
     self.assertEqual(len(samples), 2)
     self.assertEqual(
         {samples[0][0].var, samples[0][1].var, samples[0][2].var},
         {"diff", "intel", "grade"},
     )
     self.assertEqual(
         {samples[1][0].var, samples[1][1].var, samples[1][2].var},
         {"diff", "intel", "grade"},
     )
Ejemplo n.º 12
0
    def set_start_state(self, start_state):
        """
        Set the start state of the Markov Chain. If the start_state is given as a array-like iterable, its contents
        are reordered in the internal representation.

        Parameters:
        -----------
        start_state: dict or array-like iterable object
            Dict (or list) of tuples representing the starting states of the variables.

        Examples:
        ---------
        >>> from pgmpy.models import MarkovChain as MC
        >>> from pgmpy.factors.discrete import State
        >>> model = MC(['a', 'b'], [2, 2])
        >>> model.set_start_state([State('a', 0), State('b', 1)])
        """
        if start_state is not None:
            if not hasattr(start_state, '__iter__') or isinstance(start_state, six.string_types):
                raise ValueError('start_state must be a non-string iterable.')
            # Must be an array-like iterable. Reorder according to self.variables.
            state_dict = {var: st for var, st in start_state}
            start_state = [State(var, state_dict[var]) for var in self.variables]
        if start_state is None or self._check_state(start_state):
            self.state = start_state
Ejemplo n.º 13
0
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgmpy.models import MarkovChain as MC
        >>> from pgmpy.factors.discrete import State
        >>> model = MC(['intel', 'diff'], [2, 3])
        >>> model.set_start_state([State('intel', 0), State('diff', 2)])
        >>> intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.1, 1: 0.5, 2: 0.4}, 1: {0: 0.2, 1: 0.2, 2: 0.6 }, 2: {0: 0.7, 1: 0.15, 2: 0.15}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> model.sample(size=5)
           intel  diff
        0      0     2
        1      1     0
        2      0     1
        3      1     0
        4      0     2
        """
        if start_state is None:
            if self.state is None:
                self.state = self.random_state()
            # else use previously-set state
        else:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]

        var_states = defaultdict(dict)
        var_values = defaultdict(dict)
        samples = defaultdict(dict)
        for var in self.transition_models.keys():
            for st in self.transition_models[var]:
                var_states[var][st] = list(self.transition_models[var][st].keys())
                var_values[var][st] = list(self.transition_models[var][st].values())
                samples[var][st] = sample_discrete(var_states[var][st], var_values[var][st], size=size)

        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                next_st = samples[var][st][i]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]

        return sampled
Ejemplo n.º 14
0
def rejection_estimate(n):
    inferences = BayesianModelSampling(disease_model)
    evidences = [
        State(var='Fatigue', state=0),
        State(var='Fever', state=0),
        State(var='FluShot', state=0)
    ]

    p = inferences.rejection_sample(evidences, n)
    i = 0

    for t in range(n):
        if p['Flu'][t] == float(0):
            i = i + 1
            plt.plot(t, (i / n), 'bo')
    plt.ylabel('Evolving esimate')
    plt.xlabel('Number of samples')
    plt.show()
Ejemplo n.º 15
0
 def test_rejection_sample_basic(self):
     sample = self.sampling_inference.rejection_sample(
         [State('A', 1), State('J', 1),
          State('R', 1)], 25)
     self.assertEquals(len(sample), 25)
     self.assertEquals(len(sample.columns), 6)
     self.assertIn('A', sample.columns)
     self.assertIn('J', sample.columns)
     self.assertIn('R', sample.columns)
     self.assertIn('Q', sample.columns)
     self.assertIn('G', sample.columns)
     self.assertIn('L', sample.columns)
     self.assertTrue(set(sample.A).issubset({1}))
     self.assertTrue(set(sample.J).issubset({1}))
     self.assertTrue(set(sample.R).issubset({1}))
     self.assertTrue(set(sample.Q).issubset({0, 1}))
     self.assertTrue(set(sample.G).issubset({0, 1}))
     self.assertTrue(set(sample.L).issubset({0, 1}))
Ejemplo n.º 16
0
 def test_likelihood_weighted_sample(self):
     sample = self.sampling_inference.likelihood_weighted_sample(
         [State('A', 0), State('J', 1),
          State('R', 0)], 25)
     self.assertEquals(len(sample), 25)
     self.assertEquals(len(sample.columns), 7)
     self.assertIn('A', sample.columns)
     self.assertIn('J', sample.columns)
     self.assertIn('R', sample.columns)
     self.assertIn('Q', sample.columns)
     self.assertIn('G', sample.columns)
     self.assertIn('L', sample.columns)
     self.assertIn('_weight', sample.columns)
     self.assertTrue(set(sample.A).issubset({0, 1}))
     self.assertTrue(set(sample.J).issubset({0, 1}))
     self.assertTrue(set(sample.R).issubset({0, 1}))
     self.assertTrue(set(sample.Q).issubset({0, 1}))
     self.assertTrue(set(sample.G).issubset({0, 1}))
     self.assertTrue(set(sample.L).issubset({0, 1}))
 def test_rejection_sample_basic(self):
     sample = self.sampling_inference.rejection_sample()
     sample = self.sampling_inference.rejection_sample(
         [State("A", 1), State("J", 1),
          State("R", 1)], 25)
     self.assertEquals(len(sample), 25)
     self.assertEquals(len(sample.columns), 6)
     self.assertIn("A", sample.columns)
     self.assertIn("J", sample.columns)
     self.assertIn("R", sample.columns)
     self.assertIn("Q", sample.columns)
     self.assertIn("G", sample.columns)
     self.assertIn("L", sample.columns)
     self.assertTrue(set(sample.A).issubset({1}))
     self.assertTrue(set(sample.J).issubset({1}))
     self.assertTrue(set(sample.R).issubset({1}))
     self.assertTrue(set(sample.Q).issubset({0, 1}))
     self.assertTrue(set(sample.G).issubset({0, 1}))
     self.assertTrue(set(sample.L).issubset({0, 1}))
 def test_likelihood_weighted_sample(self):
     sample = self.sampling_inference.likelihood_weighted_sample()
     sample = self.sampling_inference.likelihood_weighted_sample(
         [State("A", 0), State("J", 1),
          State("R", 0)], 25)
     self.assertEquals(len(sample), 25)
     self.assertEquals(len(sample.columns), 7)
     self.assertIn("A", sample.columns)
     self.assertIn("J", sample.columns)
     self.assertIn("R", sample.columns)
     self.assertIn("Q", sample.columns)
     self.assertIn("G", sample.columns)
     self.assertIn("L", sample.columns)
     self.assertIn("_weight", sample.columns)
     self.assertTrue(set(sample.A).issubset({0, 1}))
     self.assertTrue(set(sample.J).issubset({0, 1}))
     self.assertTrue(set(sample.R).issubset({0, 1}))
     self.assertTrue(set(sample.Q).issubset({0, 1}))
     self.assertTrue(set(sample.G).issubset({0, 1}))
     self.assertTrue(set(sample.L).issubset({0, 1}))
Ejemplo n.º 19
0
    def is_stationarity(self, tolerance=0.2, sample=None):
        """
        Checks if the given markov chain is stationary and checks the steady state
        probablity values for the state are consistent.

        Parameters:
        -----------
        tolerance: float
            represents the diff between actual steady state value and the computed value
        sample: [State(i,j)]
            represents the list of state which the markov chain has sampled

        Return Type:
        ------------
        Boolean
        True, if the markov chain converges to steady state distribution within the tolerance
        False, if the markov chain does not converge to steady state distribution within tolerance

        Examples:
        ---------
        >>> from pgmpy.models.MarkovChain import MarkovChain
        >>> from pgmpy.factors.discrete import State
        >>> model = MarkovChain()
        >>> model.add_variables_from(['intel', 'diff'], [3, 2])
        >>> intel_tm = {0: {0: 0.2, 1: 0.4, 2:0.4}, 1: {0: 0, 1: 0.5, 2: 0.5}, 2: {0: 0.3, 1: 0.3, 2: 0.4}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.5, 1: 0.5}, 1: {0: 0.25, 1:0.75}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> model.is_stationarity()
        True
        """
        keys = self.transition_models.keys()
        return_val = True
        for k in keys:
            # convert dict to numpy matrix
            transition_mat = np.array([np.array(list(self.transition_models[k][i].values()))
                                       for i in self.transition_models[k].keys()], dtype=np.float)
            S, U = eig(transition_mat.T)
            stationary = np.array(U[:, np.where(np.abs(S - 1.) < 1e-8)[0][0]].flat)
            stationary = (stationary / np.sum(stationary)).real

            probabilites = []
            window_size = 10000 if sample is None else len(sample)
            for i in range(0, transition_mat.shape[0]):
                probabilites.extend(self.prob_from_sample([State(k, i)], window_size=window_size))
            if any(np.abs(i) > tolerance for i in np.subtract(probabilites, stationary)):
                return_val = return_val and False
            else:
                return_val = return_val and True

        return return_val
Ejemplo n.º 20
0
 def test_is_stationarity_failure(self):
     model = MC(['intel', 'diff'], [2, 3])
     model.set_start_state([State('intel', 0), State('diff', 2)])
     intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}}
     model.add_transition_model('intel', intel_tm)
     diff_tm = {
         0: {
             0: 0.1,
             1: 0.5,
             2: 0.4
         },
         1: {
             0: 0.2,
             1: 0.2,
             2: 0.6
         },
         2: {
             0: 0.7,
             1: 0.15,
             2: 0.15
         }
     }
     model.add_transition_model('diff', diff_tm)
     self.assertFalse(model.is_stationarity(0.002, None))
Ejemplo n.º 21
0
    def random_state(self):
        """
        Generates a random state of the Markov Chain.

        Return Type:
        ------------
        List of namedtuples, representing a random assignment to all variables of the model.

        Examples:
        ---------
        >>> from pgmpy.models import MarkovChain as MC
        >>> model = MC(['intel', 'diff'], [2, 3])
        >>> model.random_state()
        [State('diff', 2), State('intel', 1)]
        """
        return [State(var, np.random.randint(self.cardinalities[var])) for var in self.variables]
Ejemplo n.º 22
0
def sample_slots(model_info_file, mr_slot_names):
    model_info = helpers.load_from_pickle(model_info_file)
    model = model_info['model']
    inference = BayesianModelSampling(model)
    # use the missing mr slots as evidence
    all_slots = model_info['all_slots']
    missing_slots = [mr for mr in all_slots if mr not in mr_slot_names]
    evidence = [State(mr, 0) for mr in missing_slots]
    inference = BayesianModelSampling(model)
    # don't allow empty samples
    sampled_slots = []
    while (sampled_slots == []):
        sample = inference.rejection_sample(evidence=evidence,
                                            size=1,
                                            return_type='recarray')
        # return a list of the column names which had presence
        sampled_slots = [
            name for var, name in zip(sample.view('<i8'), sample.dtype.names)
            if var == 1
        ]
    return sampled_slots
    def generate_sample(self, start_state=None, size=1):
        """
        Generator version of self.sample

        Returns
        -------
        List of State namedtuples, representing the assignment to all variables of the model.

        Examples
        --------
        >>> from pgmpy.models.MarkovChain import MarkovChain
        >>> from pgmpy.factors.discrete import State
        >>> model = MarkovChain()
        >>> model.add_variables_from(['intel', 'diff'], [3, 2])
        >>> intel_tm = {0: {0: 0.2, 1: 0.4, 2:0.4}, 1: {0: 0, 1: 0.5, 2: 0.5}, 2: {0: 0.3, 1: 0.3, 2: 0.4}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.5, 1: 0.5}, 1: {0: 0.25, 1:0.75}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> gen = model.generate_sample([State('intel', 0), State('diff', 0)], 2)
        >>> [sample for sample in gen]
        [[State(var='intel', state=2), State(var='diff', state=1)],
         [State(var='intel', state=2), State(var='diff', state=0)]]
        """
        if start_state is None:
            if self.state is None:
                self.state = self.random_state()
            # else use previously-set state
        else:
            self.set_start_state(start_state)
        # sampled.loc[0] = [self.state[var] for var in self.variables]

        for i in range(size):
            for j, (var, st) in enumerate(self.state):
                next_st = sample_discrete(
                    list(self.transition_models[var][st].keys()),
                    list(self.transition_models[var][st].values()),
                )[0]
                self.state[j] = State(var, next_st)
            yield self.state[:]
Ejemplo n.º 24
0
 def test_check_state_bad_var_value(self):
     model = MC(['a'], [2])
     # value of variable >= cardinaliity
     self.assertRaises(ValueError, model._check_state, [State('a', 3)])
Ejemplo n.º 25
0
trainData = modelData.iloc[0:int(0.85 * modelData.shape[0])].copy()

model.fit(trainData, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
#    print("CPD of {variable}:".format(variable=cpd.variable))
#    print(cpd)

model_sample = BayesianModelSampling(model)
pickle.dump(model_sample, open('results/sampler.p', 'wb'))

# open the nhts sample and add the inferred resType requirements
nhtsSample = pd.read_csv('results/nhtsSample.csv')
resType = []
for ind, row in nhtsSample.iterrows():
    evidence = [
        State('IncomeQ', min(row['hh_income'] - 1, 10)),
        State('HhSize', min(row['hh_size'] - 1, 5))
    ]
    sample = model_sample.likelihood_weighted_sample(evidence=evidence, size=1)
    resType.extend([int(sample['Bedrooms']) * 3 + int(sample['RentQ'])])
nhtsSample['resType'] = resType
os.chdir('..')
nhtsSample[nhtsSample['occupation_type'] == 1].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_1.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 2].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_2.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 3].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_3.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 4].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_4.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 5].sample(
Ejemplo n.º 26
0
 def test_check_state_success(self):
     model = MC(['a'], [2])
     self.assertTrue(model._check_state([State('a', 1)]))
Ejemplo n.º 27
0
    def test_copy(self):
        model = MC(['a', 'b'], [2, 2], [State('a', 0), State('b', 1)])
        model.add_transition_model('a', {
            0: {
                0: 0.1,
                1: 0.9
            },
            1: {
                0: 0.2,
                1: 0.8
            }
        })
        model.add_transition_model('b', {
            0: {
                0: 0.3,
                1: 0.7
            },
            1: {
                0: 0.4,
                1: 0.6
            }
        })
        copy = model.copy()

        self.assertIsInstance(copy, MC)
        self.assertEqual(sorted(model.variables), sorted(copy.variables))
        self.assertEqual(model.cardinalities, copy.cardinalities)
        self.assertEqual(model.transition_models, copy.transition_models)
        self.assertEqual(model.state, copy.state)

        model.add_variable('p', 1)
        model.set_start_state([State('a', 0), State('b', 1), State('p', 0)])
        model.add_transition_model('p', {0: {0: 1}})

        self.assertNotEqual(sorted(model.variables), sorted(copy.variables))
        self.assertEqual(sorted(['a', 'b']), sorted(copy.variables))
        self.assertNotEqual(model.cardinalities, copy.cardinalities)
        self.assertEqual({'a': 2, 'b': 2}, copy.cardinalities)
        self.assertNotEqual(model.state, copy.state)
        self.assertEqual([State('a', 0), State('b', 1)], copy.state)
        self.assertNotEqual(model.transition_models, copy.transition_models)
        self.assertEqual(len(copy.transition_models), 2)
        self.assertEqual(copy.transition_models['a'], {
            0: {
                0: 0.1,
                1: 0.9
            },
            1: {
                0: 0.2,
                1: 0.8
            }
        })
        self.assertEqual(copy.transition_models['b'], {
            0: {
                0: 0.3,
                1: 0.7
            },
            1: {
                0: 0.4,
                1: 0.6
            }
        })
Ejemplo n.º 28
0
 def test_prob_from_sample(self, sample):
     model = MC(['a', 'b'], [2, 2])
     sample.return_value = self.sample
     probabilites = model.prob_from_sample([State('a', 1), State('b', 0)])
     self.assertEqual(list(probabilites), [1] * 50 + [0] * 50)
Ejemplo n.º 29
0
corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)

# eaxmple: if we condition on "child_screen_time"..
# ..then "child_physical_activity" becomes independent of "parent_education":
corr_mat = simulated_sample.query("child_screen_time==1").drop(
    "child_screen_time", axis=1).corr()
corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)

corr_mat = simulated_sample.query("child_screen_time==0").drop(
    "child_screen_time", axis=1).corr()
corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)

# suppose that we are interested in measuring the average causal effect of "child_screen_time" on "obesity"
# we can estimate this by simulating from the system:
simulated_sample_lowScreentime = inference.rejection_sample(
    evidence=[State(var='child_screen_time', state="low")], size=10_000)
simulated_sample_highScreentime = inference.rejection_sample(
    evidence=[State(var='child_screen_time', state="high")], size=10_000)
# the observed effect of high screen time on prob. of high child obesity is:
((simulated_sample_highScreentime["child_obesity"] == "high").sum() /
 len(simulated_sample_highScreentime)) / (
     (simulated_sample_lowScreentime["child_obesity"] == "high").sum() /
     len(simulated_sample_lowScreentime))
# i.e. around 2x

infer_adjusted = CausalInference(pg_model)
print(
    infer_adjusted.query(variables=["child_obesity"],
                         do={"child_screen_time": "high"}))

# we can estimate this effect from the observed data using a logistic regression model:
Ejemplo n.º 30
0
 def test_check_state_bad_vars(self):
     model = MC()
     # state_vars and model_vars differ
     self.assertRaises(ValueError, model._check_state, [State(1, 2)])