Exemple #1
0
    def _compute_mutual_information(self, X, pair):
        p_node = Factor(X.groupby(pair.node).size()).normalize()
        p_parents = Factor(X.groupby(list(pair.parents)).size()).normalize()
        p_nodeparents = Factor(X.groupby([*pair.parents, pair.node]).size()).normalize()

        # todo: have to get values from Factor: 'numpy.ndarray' object has no attribute '_data'
        mi = np.sum(p_nodeparents.values * np.log(p_nodeparents/(p_node*p_parents)))
        return mi
    def test_creation(self):
        """Test creating Factors."""
        with self.assertRaises(Exception) as context:
            Factor(0)

        fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']})

        self.assertEqual(
            repr(fA), 'factor(A)\nA \na1    0.6\na0    0.4\ndtype: float64')
Exemple #3
0
    def _compute_conditional_distributions(self, X):
        P = dict()
        local_epsilon = self.epsilon * self.epsilon_split[1] / (
            self._n_columns_fit - self.degree_network)

        # first materialize noisy distributions for nodes who have a equal number of parents to the degree k.
        # earlier nodes can be inferred from these distributions without adding extra noise
        for idx, pair in enumerate(self.network_[self.degree_network:]):
            cpt_size = utils.get_size_contingency_table(
                X[[*pair.parents, pair.node]])
            if self.verbose >= 2:
                print(
                    'Learning conditional probabilities: {} - with parents {} ~ estimated size: {}'
                    .format(pair.node, pair.parents, cpt_size))
            attributes = [*pair.parents, pair.node]
            dp_joint_distribution = dp_utils.dp_joint_distribution(
                X[attributes], epsilon=local_epsilon)
            # dp_joint_distribution = utils.joint_distribution(X[attributes])
            cpt = CPT(dp_joint_distribution, conditioned_variables=[pair.node])
            # todo: use custom normalization to fill missing values with uniform
            cpt = utils.normalize_cpt(cpt, dropna=False)
            P[pair.node] = cpt
            # retain noisy joint distribution from k+1 node to infer distributions parent nodes
            if idx == 0:
                infer_from_distribution = Factor(dp_joint_distribution)
                infer_from_distribution = infer_from_distribution.sum_out(
                    pair.node)

        # for pair in self.network_[:self.k]:

        # go iteratively from node at k to root of network, sum out child nodes and get cpt.
        for pair in reversed(self.network_[:self.degree_network]):
            if pair.parents is not None:
                attributes = [*pair.parents, pair.node]
            else:
                attributes = [pair.node]
            cpt_size = utils.get_size_contingency_table(X[attributes])
            if self.verbose >= 2:
                print(
                    'Learning conditional probabilities: {} - with parents {} ~ estimated size: {}'
                    .format(pair.node, pair.parents, cpt_size))
            # infer_from_distribution = infer_from_distribution.sum_out(pair.node)
            # conditioned_var = pair.parents[-1]
            cpt = CPT(infer_from_distribution,
                      conditioned_variables=[pair.node])
            cpt = utils.normalize_cpt(cpt, dropna=False)

            P[pair.node] = cpt
            infer_from_distribution = infer_from_distribution.sum_out(
                pair.node)

        self.cpt_ = P
        return self
    def test_from_data(self):
        """Test creating an (empirical) distribution from data."""
        filename = thomas.core.get_pkg_data('dataset_17_2.csv')
        df = pd.read_csv(filename, sep=';')

        scope = ['H', 'S', 'E']
        factor = Factor.from_data(df, cols=scope)

        self.assertEqual(set(factor.scope), set(scope))
        self.assertEqual(factor.sum(), 16)
    def test_state_order(self):
        """Test that a Factor keeps its states in order and/or its index correct.

            Regression test for GitHub issue #1.
        """
        # P(A)
        fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']})

        self.assertEquals(fA['a1'], 0.6)
        self.assertEquals(fA['a0'], 0.4)

        # P(B|A)
        fB_A = Factor([0.2, 0.8, 0.75, 0.25], {
            'A': ['a1', 'a0'],
            'B': ['b1', 'b0']
        })

        self.assertEquals(fB_A['a1', 'b1'], 0.20)
        self.assertEquals(fB_A['a1', 'b0'], 0.80)
        self.assertEquals(fB_A['a0', 'b1'], 0.75)
        self.assertEquals(fB_A['a0', 'b0'], 0.25)
    def test_serialization_complex(self):
        """Test the JSON serialization."""
        [fA, fB_A, fC_A, fD_BC, fE_C] = examples.get_sprinkler_factors()

        dict_repr = fB_A.as_dict()

        # Make sure the expected keys exist
        for key in ['type', 'scope', 'states', 'data']:
            self.assertTrue(key in dict_repr)

        self.assertTrue(dict_repr['type'] == 'Factor')

        fB_A2 = Factor.from_dict(dict_repr)
        self.assertEquals(fB_A.scope, fB_A2.scope)
        self.assertEquals(fB_A.states, fB_A2.states)
    def test_multiplication_state_order(self):
        """Test factor multiplication with states ordered differently."""
        # fE has states ordered ['T', 'F']; T=1.0, F=0.0
        fE_out_of_order = Factor.from_dict({
            'type': 'Factor',
            'scope': ['E'],
            'states': {
                'E': ['T', 'F']
            },
            'data': [1.0, 0.0]
        })

        fE = Factor.from_dict({
            'type': 'Factor',
            'scope': ['E'],
            'states': {
                'E': ['F', 'T']
            },
            'data': [0.0, 1.0]
        })

        multiplied = fE * fE_out_of_order
        self.assertAlmostEquals(multiplied['T'], 1.0, places=2)
        self.assertAlmostEquals(multiplied['F'], 0.0, places=2)
    def test_values(self):
        """Test cast to np.array."""
        fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']})

        self.assertTrue(isinstance(fA.values, np.ndarray))
    def test_align_index(self):
        """Test index aligning."""
        fA1 = Factor.from_dict({
            'type': 'Factor',
            'scope': ['A'],
            'states': {
                'A': ['a1', 'a2', 'a3']
            },
            'data': [1.0, 2.0, 3.0]
        })

        fA2 = Factor.from_dict({
            'type': 'Factor',
            'scope': ['A'],
            'states': {
                'A': ['a3', 'a1', 'a2']
            },
            'data': [3.0, 1.0, 0.0]
        })

        fB = Factor.from_dict({
            'type': 'Factor',
            'scope': ['B'],
            'states': {
                'B': ['b3', 'b1', 'b2']
            },
            'data': [3.0, 1.0, 0.0]
        })

        aligned = fA1.align_index(fA2)

        self.assertTrue(isinstance(aligned, Factor))
        self.assertEquals(aligned.scope, fA1.scope)
        self.assertEquals(aligned.scope, fA2.scope)

        self.assertEquals(aligned['a1'], 1.0)
        self.assertEquals(aligned['a2'], 2.0)
        self.assertEquals(aligned['a3'], 3.0)

        with self.assertRaises(error.IncompatibleScopeError):
            fA1.align_index(fB)

        # Same factors, but one variable has a different order.
        fB_A1 = Factor([0.2, 0.8, 0.75, 0.25], {
            'A': ['a1', 'a0'],
            'B': ['b1', 'b0']
        })

        fB_A2 = Factor([0.8, 0.2, 0.25, 0.75], {
            'A': ['a1', 'a0'],
            'B': ['b0', 'b1']
        })

        aligned = fB_A1.align_index(fB_A2)
        self.assertEquals(aligned['a1', 'b1'], 0.2)
        self.assertEquals(aligned['a1', 'b0'], 0.8)
        self.assertEquals(aligned['a0', 'b1'], 0.75)
        self.assertEquals(aligned['a0', 'b0'], 0.25)

        aligned = fB_A2.align_index(fB_A1)
        self.assertEquals(aligned['a1', 'b1'], 0.2)
        self.assertEquals(aligned['a1', 'b0'], 0.8)
        self.assertEquals(aligned['a0', 'b1'], 0.75)
        self.assertEquals(aligned['a0', 'b0'], 0.25)

        # This redefines fA1 and fA0!
        fA1 = Factor([1.0, 0.0], {'A': ['a1', 'a0']})
        fA0 = Factor([0.0, 1.0], {'A': ['a0', 'a1']})

        aligned = fA1.align_index(fB_A1)
        self.assertEquals(aligned['a1'], 1.0)
        self.assertEquals(aligned['a0'], 0.0)

        aligned = fA0.align_index(fB_A1)
        self.assertEquals(aligned['a1'], 1.0)
        self.assertEquals(aligned['a0'], 0.0)