class BayesianDataSetTest(unittest.TestCase): def setUp(self): self.bn0 = BayesianDataSet('testData/CancerMAX1.txt') # THIS IS NOT 20k test case self.bn0.addChildNode(4,'No', [0,1,2,3], ['False', 'False', 'False', 'Medium']) self.child_node0 = self.bn0.children[4] def test_encoding(self): #print self.bn0.domain #print self.bn0._str_to_num #print self.child_node0 #domain is: #[ 0 :[0,1,2], 1:[0, 1], 2:[0, 1], 3:[0, 1, 2]] # parent char states are: # [2,1,1,1] # string domain: ##(('OnePack', 'TwoPacks', 'False'), ('True', 'False'), ('True', 'False'), ('Bad', 'Medium', 'Good'), ('Benign', 'Malignant', 'No')) e0 = self.bn0._decode_equation([1,0,0,0,1,0], self.child_node0) print [self.bn0.domain[i][e0[i]] for i in range(len(e0))]
def printSomething(): #b1 = BayesianDataSet("../data/NoisyOR_100k.txt") b1 = BayesianDataSet("../data/5n/10k/Network1.txt") b1.addChildNode(4,'False', [0,1,2,3], ['False', 'False', 'False', 'False']) child_node = b1.children[4] parent_columns = tuple([tuple([b1.data[i][j] for j in [0,1,2,3]]) for i in range(len(b1.data))]) parent_child_columns = tuple([tuple([b1.data[i][j] for j in [0,1,2,3,4]]) for i in range(len(b1.data))]) parent_child_counts = Counter(parent_child_columns) parent_counts_s = sorted(Counter(parent_columns).items(), key=lambda i:i[1], reverse=True) encoded_parent_counts_s = [(b1._encode_equation(equation, child_node), count) for equation, count in parent_counts_s if equation!=child_node.parent_char_states] for eq, val in encoded_parent_counts_s: #print eq,val # 0 at the end because state 0 is True nom = parent_child_counts[b1._decode_equation(tuple(eq), child_node)+(0,)] #denom = b1._decode_equation(tuple(eq)) denom = val #print eq,val, parent_child_counts[nom],parent_child_counts[b1._decode_equation(tuple(eq)+(0,),child_node)], parent_child_counts[b1._decode_equation(tuple(eq)+(1,),child_node)]/ float(val) print eq, "n:%s d:%s, val:%s"%(nom ,denom, float(nom)/denom)