def test_get_next(self): data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] counter = 0 kfold = KFold(2, data, classes) while kfold.has_next(): train_d1, test_d1 = kfold.get_next() self.assertEquals(8, len(train_d1)) self.assertEquals(2, len(test_d1)) counter += 1 self.assertEquals(5, counter) kfold = KFold(2, data, classes) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertListEqual(train_d1, [3, 4, 5, 6, 7, 8, 9, 10]) self.assertListEqual(test_d1, [1, 2]) train_d1, test_d1 = kfold.get_next() self.assertListEqual(train_d1, [1, 2, 5, 6, 7, 8, 9, 10]) self.assertEquals(test_d1, [3, 4]) kfold.get_next() kfold.get_next() kfold.get_next() self.assertListEqual(train_d1, [1, 2, 3, 4, 5, 6, 7, 8]) self.assertListEqual(test_d1, [9, 10])
def test_get_next(self): data = [1,2,3,4,5,6,7,8,9,10] classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] counter = 0 kfold = KFold(2, data, classes) while kfold.has_next(): train_d1, test_d1 = kfold.get_next() self.assertEquals(8, len(train_d1)) self.assertEquals(2, len(test_d1)) counter += 1 self.assertEquals(5, counter) kfold = KFold(2, data, classes) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertListEqual(train_d1, [3,4,5,6,7,8,9,10]) self.assertListEqual(test_d1, [1,2]) train_d1, test_d1= kfold.get_next() self.assertListEqual(train_d1, [1,2,5,6,7,8,9,10]) self.assertEquals(test_d1, [3,4]) kfold.get_next() kfold.get_next() kfold.get_next() self.assertListEqual(train_d1, [1,2,3,4,5,6,7,8]) self.assertListEqual(test_d1, [9,10])
def test_network(data, classes, nodes, weights): confusion_matrix = {} for key1 in t_maker.mapping.keys(): confusion_matrix[key1] = {} for key2 in t_maker.mapping.keys(): confusion_matrix[key1][key2] = 0 correct = 0 kfold_counter = 1 kfold = KFold(15, data, classes) while kfold.has_next(): print "Starting KFold "+str(kfold_counter) network = Network(nodes, weights) network.learning_rate = 0.04 train_d, train_c, test_d, test_c = kfold.get_next() # update network until it levels out for i in range(0, 1000): for j in range(0, len(train_d)): counter = 0 for val in train_d[j]: Network.set_inputs(counter, val, network.neurons[0]) counter += 1 t_val = t_maker.get(train_c[j]) network.feed_forward() network.set_errors(t_val) network.update_weights() for i in range(0, len(test_d)): counter = 0 for val in test_d[i]: Network.set_inputs(counter, val, network.neurons[0]) counter += 1 network.feed_forward() actual = t_maker.get(test_c[i]) guess = Network.get_class_vec_from_output(network.neurons[-1]) are_equal = t_vec_maker.t_vecs_equal(actual, guess) confusion_matrix[t_maker.vec_to_val(guess)][t_maker.vec_to_val(actual)] += 1 if are_equal: correct += 1 kfold_counter += 1 return correct, 150, confusion_matrix
ngram_csv = [] unseens_csv = [] for class_number in [1, 5, 10, 20]: for ngram_size in [2, 3, 4, 5]: fold_step = len(words) // 10 kfold = KFold(words, step=fold_step) total_correct = 0 total_tested = 0 total_unseen = 0 while kfold.has_next(): train, test = kfold.get_next() if fold_step <= len(test): ng = NGram(train, ngram_size=ngram_size, classification_number=class_number) ng.generate_counts() counter = 0 counter_skip = counter + ngram_size - 1 total = 0 correct = 0 unseens = 0
unseens_csv = [] for class_number in [1,5,10,20]: for ngram_size in [2,3,4,5]: fold_step = len(words)//10 kfold = KFold(words, step=fold_step) total_correct = 0 total_tested = 0 total_unseen = 0 while kfold.has_next(): train, test = kfold.get_next() if fold_step <= len(test): ng = NGram(train, ngram_size=ngram_size, classification_number = class_number) ng.generate_counts() counter = 0 counter_skip = counter+ngram_size-1 total = 0 correct = 0 unseens = 0 while counter_skip < len(test):