예제 #1
0
    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD('g',
                           2,
                           values=np.random.rand(2, 4),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        cpd_l = TabularCPD('l',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['g'],
                           evidence_card=[2])
        cpd_s = TabularCPD('s',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['i'],
                           evidence_card=[2])

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)
예제 #2
0
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'),
                                       ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b',
                           2, [[0.2, 0.4], [0.3, 0.4]],
                           evidence='a',
                           evidence_card=[2])
        c_cpd = TabularCPD('c',
                           2, [[0.1, 0.2], [0.3, 0.4]],
                           evidence='b',
                           evidence_card=[2])
        d_cpd = TabularCPD('d',
                           2, [[0.4, 0.3], [0.2, 0.1]],
                           evidence='c',
                           evidence_card=[2])
        e_cpd = TabularCPD('e',
                           2, [[0.3, 0.2], [0.4, 0.1]],
                           evidence='d',
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'),
                                   ('c', 'd')])
        factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100]))
        factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))
        factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))
        factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
예제 #3
0
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'], 'time': ['day', 'night']}

        self.phi1 = Factor(['speed', 'switch', 'time'],
                           [3, 2, 2], np.ones(12))
        self.phi2 = Factor(['speed', 'switch', 'time'],
                           [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = Inference(student)
        self.model2 = Inference(student, state_names=self.sn2)
예제 #4
0
    def test_check_model2(self):
        cpd_s = TabularCPD('s',
                           2,
                           values=np.array([[0.5, 0.3], [0.8, 0.7]]),
                           evidence=['i'],
                           evidence_card=2)
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)

        cpd_g = TabularCPD('g',
                           2,
                           values=np.array([[0.2, 0.3, 0.4, 0.6],
                                            [0.3, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l',
                           2,
                           values=np.array([[0.2, 0.3], [0.1, 0.7]]),
                           evidence=['g'],
                           evidence_card=[2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)
예제 #5
0
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = Factor(['C', 'B'], [4, 3],
                           [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)
예제 #6
0
 def test_add_multiple_cpds(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])
예제 #7
0
 def test_get_cpd_raises_error(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertRaises(ValueError, self.graph.get_cpds, 'sat')
예제 #8
0
    def get_parameters(self, prior='dirichlet', **kwargs):
        """
        Method for getting all the learned CPDs of the model.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if prior == 'dirichlet' and 'alpha' not in kwargs:
            alpha = {node: [1] * (self.node_card[node] * (np.product([self.node_card[_node]
                                                                      for _node in self.model.predecessors(node)])
                                                          if self.model.predecessors(node) else 1))
                     for node in self.model.nodes()}
        else:
            alpha = kwargs['alpha']

        parameters = []

        for node in self.model.nodes():
            if prior == 'dirichlet':
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    node_alpha = np.array(alpha[node])

                    values = (state_counts.values + node_alpha) / (state_counts.values.sum() + node_alpha.sum())
                    cpd = TabularCPD(node, self.node_card[node], values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array([self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = (self.data.groupby([node] + self.model.predecessors(node)).size()).values
                    node_alpha = np.array(alpha[node])
                    
                    values = (state_counts + node_alpha) / (state_counts.sum() + node_alpha.sum())
                    values = values.reshape(var_card, np.product(parent_card))
                    cpd = TabularCPD(node, var_card, values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

        return parameters
예제 #9
0
    def initialize_initial_state(self):
        """
        This method will automatically re-adjust the cpds and the edges added to the bayesian network.
        If an edge that is added as an intra time slice edge in the 0th timeslice, this method will
        automatically add it in the 1st timeslice. It will also add the cpds. However, to call this
        method, one needs to add cpds as well as the edges in the bayesian network of the whole
        skeleton including the 0th and the 1st timeslice,.

        Examples:
        -------
        >>> from pgmpy.models import DynamicBayesianNetwork as DBN
        >>> from pgmpy.factors import TabularCPD
        >>> student = DBN()
        >>> student.add_nodes_from(['D', 'G', 'I', 'S', 'L'])
        >>> student.add_edges_from([(('D', 0),('G', 0)),(('I', 0),('G', 0)),(('D', 0),('D', 1)),(('I', 0),('I', 1))])
        >>> grade_cpd = TabularCPD(('G', 0), 3, [[0.3, 0.05, 0.9, 0.5],
        ...                                      [0.4, 0.25, 0.8, 0.03],
        ...                                      [0.3, 0.7, 0.02, 0.2]],
        ...                        evidence=[('I', 0),('D', 0)],
        ...                        evidence_card=[2, 2])
        >>> d_i_cpd = TabularCPD(('D', 1), 2, [[0.6, 0.3],
        ...                                    [0.4, 0.7]],
        ...                      evidence=[('D', 0)],
        ...                      evidence_card=2)
        >>> diff_cpd = TabularCPD(('D', 0), 2, [[0.6, 0.4]])
        >>> intel_cpd = TabularCPD(('I',0), 2, [[0.7, 0.3]])
        >>> i_i_cpd = TabularCPD(('I', 1), 2, [[0.5, 0.4],
        ...                                    [0.5, 0.6]],
        ...                      evidence=[('I', 0)],
        ...                      evidence_card=2)
        >>> student.add_cpds(grade_cpd, d_i_cpd, diff_cpd, intel_cpd, i_i_cpd)
        >>> student.initialize_initial_state()
        """
        for cpd in self.cpds:
            temp_var = (cpd.variable[0], 1 - cpd.variable[1])
            parents = self.get_parents(temp_var)
            if not any(x.variable == temp_var for x in self.cpds):
                if all(x[1] == parents[0][1] for x in parents):
                    if parents:
                        evidence_card = cpd.cardinality[:0:-1]
                        new_cpd = TabularCPD(
                            temp_var, cpd.variable_card,
                            cpd.values.reshape(cpd.variable_card,
                                               np.prod(evidence_card)),
                            parents, evidence_card)
                    else:
                        if cpd.get_evidence():
                            initial_cpd = cpd.marginalize(cpd.get_evidence(),
                                                          inplace=False)
                            new_cpd = TabularCPD(
                                temp_var, cpd.variable_card,
                                np.reshape(initial_cpd.values, (-1, 2)))
                        else:
                            new_cpd = TabularCPD(
                                temp_var, cpd.variable_card,
                                np.reshape(cpd.values, (-1, 2)))
                    self.add_cpds(new_cpd)
            self.check_model()
예제 #10
0
    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')
예제 #11
0
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
예제 #12
0
 def test_get_cpd_for_node(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertEqual(self.graph.get_cpds('diff'), cpd1)
     self.assertEqual(self.graph.get_cpds('intel'), cpd2)
     self.assertEqual(self.graph.get_cpds('grade'), cpd3)
예제 #13
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
예제 #14
0
 def test_remove_multiple_cpds_string(self):
     cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
     cpd3 = TabularCPD('grade',
                       2,
                       values=np.random.rand(2, 4),
                       evidence=['diff', 'intel'],
                       evidence_card=[2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.graph.remove_cpds('diff', 'grade')
     self.assertListEqual(self.graph.get_cpds(), [cpd2])
예제 #15
0
    def test_get_parameters_missing_data(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1)
        cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]])
        ]

        self.assertSetEqual(set(mle.get_parameters()), set(cpds))
예제 #16
0
    def test_check_model(self):
        cpd_g = TabularCPD(
            'g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]),
            ['d', 'i'], [2, 2])

        cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'],
                           2)

        cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'],
                           2)

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertTrue(self.G.check_model())
예제 #17
0
 def setUp(self):
     self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                             ('b', 'c')])
     self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
     diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
     intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
     grade_cpd = TabularCPD('grade',
                            3,
                            values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                            evidence=['diff', 'intel'],
                            evidence_card=[2, 3])
     self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
예제 #18
0
 def setUp(self):
     dbn_1 = DynamicBayesianNetwork()
     dbn_1.add_edges_from([(('Z', 0), ('X', 0)), (('Z', 0), ('Y', 0)),
                           (('Z', 0), ('Z', 1))])
     cpd_start_z_1 = TabularCPD(('Z', 0), 2, [[0.8, 0.2]])
     cpd_x_1 = TabularCPD(('X', 0), 2, [[0.9, 0.6], [0.1, 0.4]], [('Z', 0)],
                          2)
     cpd_y_1 = TabularCPD(('Y', 0), 2, [[0.7, 0.2], [0.3, 0.8]], [('Z', 0)],
                          2)
     cpd_trans_z_1 = TabularCPD(('Z', 1), 2, [[0.9, 0.1], [0.1, 0.9]],
                                [('Z', 0)], 2)
     dbn_1.add_cpds(cpd_start_z_1, cpd_trans_z_1, cpd_x_1, cpd_y_1)
     dbn_1.initialize_initial_state()
     self.dbn_inference_1 = DBNInference(dbn_1)
     dbn_2 = DynamicBayesianNetwork()
     dbn_2.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)),
                           (('Z', 0), ('Z', 1))])
     cpd_start_z_2 = TabularCPD(('Z', 0), 2, [[0.5, 0.5]])
     cpd_x_2 = TabularCPD(('X', 0), 2, [[0.6, 0.9], [0.4, 0.1]], [('Z', 0)],
                          2)
     cpd_y_2 = TabularCPD(('Y', 0), 2, [[0.2, 0.3], [0.8, 0.7]], [('X', 0)],
                          2)
     cpd_z_2 = TabularCPD(('Z', 1), 2, [[0.4, 0.7], [0.6, 0.3]], [('Z', 0)],
                          2)
     dbn_2.add_cpds(cpd_x_2, cpd_y_2, cpd_z_2, cpd_start_z_2)
     dbn_2.initialize_initial_state()
     self.dbn_inference_2 = DBNInference(dbn_2)
예제 #19
0
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List of TabularCPDs, one for each variable of the model

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array([self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]

                values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0)
                if not len(values.columns) == np.prod(parent_card):
                    # some columns are missing if for some states of the parents no data was observed.
                    # reindex to add missing columns and fill in uniform (conditional) probabilities:
                    full_index = pd.MultiIndex.from_product([range(card) for card in parent_card], names=parents)
                    values = values.reindex(columns=full_index).fillna(1.0/var_card)

                cpd = TabularCPD(node, var_card, np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
예제 #20
0
    def test_add_multiple_cpds(self):
        from pgmpy.factors import TabularCPD
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)
예제 #21
0
    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_cpd(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_cpd(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_cpd(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_cpd(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))
예제 #22
0
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array(
                    [self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]
                values = self.data.groupby([node] + parents).size().unstack(
                    parents).fillna(0)
                cpd = TabularCPD(node,
                                 var_card,
                                 np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
예제 #23
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgmpy.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(var, len(self.variable_states[var]), values,
                                 evidence=self.variable_parents[var],
                                 evidence_card=[len(self.variable_states[evidence_var])
                                                for evidence_var in self.variable_parents[var]])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError('First get states of variables, edges, parents and network name')
예제 #24
0
    def get_model(self):
        model = BayesianModel(self.get_edges())
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            cpd = TabularCPD(var,
                             len(self.variable_states[var]),
                             values,
                             evidence=self.variable_parents[var],
                             evidence_card=[
                                 len(self.variable_states[evidence_var])
                                 for evidence_var in self.variable_parents[var]
                             ])
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                model.node[node][prop_name] = prop_value

        return model
예제 #25
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values[
                'CARDINALITY'] if 'CARDINALITY' in values else []
            states = self.variables[var]['STATES']
            cpd = TabularCPD(var,
                             len(states),
                             cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for var, properties in self.variables.items():
            model.node[var] = properties

        return model
예제 #26
0
파일: test_BIF.py 프로젝트: cfm25/pgmpy
    def setUp(self):
        edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'], ['dog-out', 'hear-bark']]

        cpds = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                 [0.9, 0.1, 0.3, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]),
            'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])
        }

        states = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }

        parents = {
            'bowel-problem': [],
            'dog-out': ['family-out', 'bowel-problem'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }

        properties = {
            'bowel-problem': ['position = (335, 99)'],
            'dog-out': ['position = (300, 195)'],
            'family-out': ['position = (257, 99)'],
            'hear-bark': ['position = (296, 268)'],
            'light-on': ['position = (218, 195)']
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var in sorted(cpds.keys()):
            values = cpds[var]
            cpd = TabularCPD(var,
                             len(states[var]),
                             values,
                             evidence=parents[var],
                             evidence_card=[
                                 len(states[evidence_var])
                                 for evidence_var in parents[var]
                             ])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = BIFWriter(model=self.model)
예제 #27
0
 def name_cpd(aid):
     from pgmpy.factors import TabularCPD
     cpd = TabularCPD(
         variable='N' + aid,
         variable_card=num_names,
         values=[[1.0 / num_names] * num_names])
     cpd.semtype = 'name'
     return cpd
예제 #28
0
 def test_add_single_cpd(self):
     cpd = TabularCPD('grade',
                      2,
                      values=np.random.rand(2, 4),
                      evidence=['diff', 'intel'],
                      evidence_card=[2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd)
     self.assertListEqual(self.graph.get_cpds(), [cpd])
예제 #29
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['A' + aid1 + aid2],  # [::-1],
         evidence_card=[num_same_diff])  # [::-1])
     cpd.semtype = 'score'
     return cpd
예제 #30
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['N' + aid1, 'N' + aid2],
         evidence_card=[num_names, num_names])
     cpd.semtype = 'score'
     return cpd