def test_get_next(self): data = [1,2,3,4,5,6,7,8,9,10] classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] counter = 0 kfold = KFold(2, data, classes) while kfold.has_next(): train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertEquals(8, len(train_d1)) self.assertEquals(8, len(train_c1)) self.assertEquals(2, len(test_d1)) self.assertEquals(2, len(test_c1)) counter += 1 self.assertEquals(5, counter) kfold = KFold(2, data, classes) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertEquals(True, lists_are_equal(train_d1, [3,4,5,6,7,8,9,10])) self.assertEquals(True, lists_are_equal(train_c1, ['c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])) self.assertEquals(True, lists_are_equal(test_d1, [1,2])) self.assertEquals(True, lists_are_equal(test_c1, ['a', 'b'])) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertEquals(True, lists_are_equal(train_d1, [1,2,5,6,7,8,9,10])) self.assertEquals(True, lists_are_equal(train_c1, ['a', 'b', 'e', 'f', 'g', 'h', 'i', 'j'])) self.assertEquals(True, lists_are_equal(test_d1, [3,4])) self.assertEquals(True, lists_are_equal(test_c1, ['c', 'd'])) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() train_d1, train_c1, test_d1, test_c1 = kfold.get_next() train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertEquals(True, lists_are_equal(train_d1, [1,2,3,4,5,6,7,8])) self.assertEquals(True, lists_are_equal(train_c1, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])) self.assertEquals(True, lists_are_equal(test_d1, [9,10])) self.assertEquals(True, lists_are_equal(test_c1, ['i', 'j']))
new_classes = [] for index in srt: new_data.append(data[index]) new_classes.append(classes[index]) # setup the network for node in nodes: BN.setup_node(node, nodes) # initial confusion matrix confusion = {'0':{'0':0, '1':0}, '1':{'0':0, '1':0}} # do k-fold validation total_count = 0 kfold = KFold(100, new_data, new_classes) while kfold.has_next(): dat, cls = kfold.get_next() correct_count = 0 for i in range(0, len(dat)): row = dat[i] guess = BNClassifier.classify(row, nodes, dat, cls, ['0', '1']) if guess == cls[i]: correct_count += 1 confusion[cls[i]][guess] += 1 total_count += correct_count