def _compute_mutual_information(self, X, pair): p_node = Factor(X.groupby(pair.node).size()).normalize() p_parents = Factor(X.groupby(list(pair.parents)).size()).normalize() p_nodeparents = Factor(X.groupby([*pair.parents, pair.node]).size()).normalize() # todo: have to get values from Factor: 'numpy.ndarray' object has no attribute '_data' mi = np.sum(p_nodeparents.values * np.log(p_nodeparents/(p_node*p_parents))) return mi
def test_creation(self): """Test creating Factors.""" with self.assertRaises(Exception) as context: Factor(0) fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']}) self.assertEqual( repr(fA), 'factor(A)\nA \na1 0.6\na0 0.4\ndtype: float64')
def _compute_conditional_distributions(self, X): P = dict() local_epsilon = self.epsilon * self.epsilon_split[1] / ( self._n_columns_fit - self.degree_network) # first materialize noisy distributions for nodes who have a equal number of parents to the degree k. # earlier nodes can be inferred from these distributions without adding extra noise for idx, pair in enumerate(self.network_[self.degree_network:]): cpt_size = utils.get_size_contingency_table( X[[*pair.parents, pair.node]]) if self.verbose >= 2: print( 'Learning conditional probabilities: {} - with parents {} ~ estimated size: {}' .format(pair.node, pair.parents, cpt_size)) attributes = [*pair.parents, pair.node] dp_joint_distribution = dp_utils.dp_joint_distribution( X[attributes], epsilon=local_epsilon) # dp_joint_distribution = utils.joint_distribution(X[attributes]) cpt = CPT(dp_joint_distribution, conditioned_variables=[pair.node]) # todo: use custom normalization to fill missing values with uniform cpt = utils.normalize_cpt(cpt, dropna=False) P[pair.node] = cpt # retain noisy joint distribution from k+1 node to infer distributions parent nodes if idx == 0: infer_from_distribution = Factor(dp_joint_distribution) infer_from_distribution = infer_from_distribution.sum_out( pair.node) # for pair in self.network_[:self.k]: # go iteratively from node at k to root of network, sum out child nodes and get cpt. for pair in reversed(self.network_[:self.degree_network]): if pair.parents is not None: attributes = [*pair.parents, pair.node] else: attributes = [pair.node] cpt_size = utils.get_size_contingency_table(X[attributes]) if self.verbose >= 2: print( 'Learning conditional probabilities: {} - with parents {} ~ estimated size: {}' .format(pair.node, pair.parents, cpt_size)) # infer_from_distribution = infer_from_distribution.sum_out(pair.node) # conditioned_var = pair.parents[-1] cpt = CPT(infer_from_distribution, conditioned_variables=[pair.node]) cpt = utils.normalize_cpt(cpt, dropna=False) P[pair.node] = cpt infer_from_distribution = infer_from_distribution.sum_out( pair.node) self.cpt_ = P return self
def test_from_data(self): """Test creating an (empirical) distribution from data.""" filename = thomas.core.get_pkg_data('dataset_17_2.csv') df = pd.read_csv(filename, sep=';') scope = ['H', 'S', 'E'] factor = Factor.from_data(df, cols=scope) self.assertEqual(set(factor.scope), set(scope)) self.assertEqual(factor.sum(), 16)
def test_state_order(self): """Test that a Factor keeps its states in order and/or its index correct. Regression test for GitHub issue #1. """ # P(A) fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']}) self.assertEquals(fA['a1'], 0.6) self.assertEquals(fA['a0'], 0.4) # P(B|A) fB_A = Factor([0.2, 0.8, 0.75, 0.25], { 'A': ['a1', 'a0'], 'B': ['b1', 'b0'] }) self.assertEquals(fB_A['a1', 'b1'], 0.20) self.assertEquals(fB_A['a1', 'b0'], 0.80) self.assertEquals(fB_A['a0', 'b1'], 0.75) self.assertEquals(fB_A['a0', 'b0'], 0.25)
def test_serialization_complex(self): """Test the JSON serialization.""" [fA, fB_A, fC_A, fD_BC, fE_C] = examples.get_sprinkler_factors() dict_repr = fB_A.as_dict() # Make sure the expected keys exist for key in ['type', 'scope', 'states', 'data']: self.assertTrue(key in dict_repr) self.assertTrue(dict_repr['type'] == 'Factor') fB_A2 = Factor.from_dict(dict_repr) self.assertEquals(fB_A.scope, fB_A2.scope) self.assertEquals(fB_A.states, fB_A2.states)
def test_multiplication_state_order(self): """Test factor multiplication with states ordered differently.""" # fE has states ordered ['T', 'F']; T=1.0, F=0.0 fE_out_of_order = Factor.from_dict({ 'type': 'Factor', 'scope': ['E'], 'states': { 'E': ['T', 'F'] }, 'data': [1.0, 0.0] }) fE = Factor.from_dict({ 'type': 'Factor', 'scope': ['E'], 'states': { 'E': ['F', 'T'] }, 'data': [0.0, 1.0] }) multiplied = fE * fE_out_of_order self.assertAlmostEquals(multiplied['T'], 1.0, places=2) self.assertAlmostEquals(multiplied['F'], 0.0, places=2)
def test_values(self): """Test cast to np.array.""" fA = Factor([0.6, 0.4], {'A': ['a1', 'a0']}) self.assertTrue(isinstance(fA.values, np.ndarray))
def test_align_index(self): """Test index aligning.""" fA1 = Factor.from_dict({ 'type': 'Factor', 'scope': ['A'], 'states': { 'A': ['a1', 'a2', 'a3'] }, 'data': [1.0, 2.0, 3.0] }) fA2 = Factor.from_dict({ 'type': 'Factor', 'scope': ['A'], 'states': { 'A': ['a3', 'a1', 'a2'] }, 'data': [3.0, 1.0, 0.0] }) fB = Factor.from_dict({ 'type': 'Factor', 'scope': ['B'], 'states': { 'B': ['b3', 'b1', 'b2'] }, 'data': [3.0, 1.0, 0.0] }) aligned = fA1.align_index(fA2) self.assertTrue(isinstance(aligned, Factor)) self.assertEquals(aligned.scope, fA1.scope) self.assertEquals(aligned.scope, fA2.scope) self.assertEquals(aligned['a1'], 1.0) self.assertEquals(aligned['a2'], 2.0) self.assertEquals(aligned['a3'], 3.0) with self.assertRaises(error.IncompatibleScopeError): fA1.align_index(fB) # Same factors, but one variable has a different order. fB_A1 = Factor([0.2, 0.8, 0.75, 0.25], { 'A': ['a1', 'a0'], 'B': ['b1', 'b0'] }) fB_A2 = Factor([0.8, 0.2, 0.25, 0.75], { 'A': ['a1', 'a0'], 'B': ['b0', 'b1'] }) aligned = fB_A1.align_index(fB_A2) self.assertEquals(aligned['a1', 'b1'], 0.2) self.assertEquals(aligned['a1', 'b0'], 0.8) self.assertEquals(aligned['a0', 'b1'], 0.75) self.assertEquals(aligned['a0', 'b0'], 0.25) aligned = fB_A2.align_index(fB_A1) self.assertEquals(aligned['a1', 'b1'], 0.2) self.assertEquals(aligned['a1', 'b0'], 0.8) self.assertEquals(aligned['a0', 'b1'], 0.75) self.assertEquals(aligned['a0', 'b0'], 0.25) # This redefines fA1 and fA0! fA1 = Factor([1.0, 0.0], {'A': ['a1', 'a0']}) fA0 = Factor([0.0, 1.0], {'A': ['a0', 'a1']}) aligned = fA1.align_index(fB_A1) self.assertEquals(aligned['a1'], 1.0) self.assertEquals(aligned['a0'], 0.0) aligned = fA0.align_index(fB_A1) self.assertEquals(aligned['a1'], 1.0) self.assertEquals(aligned['a0'], 0.0)