def test_get_next(self): data = [1,2,3,4,5,6,7,8,9,10] classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] counter = 0 kfold = KFold(2, data, classes) while kfold.has_next(): train_d1, test_d1 = kfold.get_next() self.assertEquals(8, len(train_d1)) self.assertEquals(2, len(test_d1)) counter += 1 self.assertEquals(5, counter) kfold = KFold(2, data, classes) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertListEqual(train_d1, [3,4,5,6,7,8,9,10]) self.assertListEqual(test_d1, [1,2]) train_d1, test_d1= kfold.get_next() self.assertListEqual(train_d1, [1,2,5,6,7,8,9,10]) self.assertEquals(test_d1, [3,4]) kfold.get_next() kfold.get_next() kfold.get_next() self.assertListEqual(train_d1, [1,2,3,4,5,6,7,8]) self.assertListEqual(test_d1, [9,10])
def test_network(data, classes, nodes, weights): confusion_matrix = {} for key1 in t_maker.mapping.keys(): confusion_matrix[key1] = {} for key2 in t_maker.mapping.keys(): confusion_matrix[key1][key2] = 0 correct = 0 kfold_counter = 1 kfold = KFold(15, data, classes) while kfold.has_next(): print "Starting KFold "+str(kfold_counter) network = Network(nodes, weights) network.learning_rate = 0.04 train_d, train_c, test_d, test_c = kfold.get_next() # update network until it levels out for i in range(0, 1000): for j in range(0, len(train_d)): counter = 0 for val in train_d[j]: Network.set_inputs(counter, val, network.neurons[0]) counter += 1 t_val = t_maker.get(train_c[j]) network.feed_forward() network.set_errors(t_val) network.update_weights() for i in range(0, len(test_d)): counter = 0 for val in test_d[i]: Network.set_inputs(counter, val, network.neurons[0]) counter += 1 network.feed_forward() actual = t_maker.get(test_c[i]) guess = Network.get_class_vec_from_output(network.neurons[-1]) are_equal = t_vec_maker.t_vecs_equal(actual, guess) confusion_matrix[t_maker.vec_to_val(guess)][t_maker.vec_to_val(actual)] += 1 if are_equal: correct += 1 kfold_counter += 1 return correct, 150, confusion_matrix
def training_kfold(fold=2, model_fn=VGG): (train, label) = CNN_Regression.load_kfold( path=["./Images/", "./Images2/", "./Images3/"], channel=3, new_size=(img_width, img_height)) # scaler = preprocessing.MaxAbsScaler() # train = scaler.fit_transform(train) scaler_val = preprocessing.MaxAbsScaler() label = scaler_val.fit_transform(label.reshape(-1, 1)) label = label.flatten() fold = 2 kf = KFold(train, label, fold, 3) for i in range(0, fold): train, label, vali_train, vali_label = kf.getItem(i) model = model_fn(train, label, vali_train, vali_label) model_checkpoint = ModelCheckpoint('./modelWights/weights' + model_fn.__name__ + '.h5', monitor='val_loss', save_best_only=True) history = model.fit(train, label, batch_size=data_batch_size, epochs=200, validation_data=(vali_train, vali_label), callbacks=[model_checkpoint]) model.save('./modelWights/regression_model' + model_fn.__name__ + '.h5') y_pred = model.predict(vali_train, batch_size=data_batch_size, verbose=1) # y_pred = np.load('imgs_mask_test.npy') vali_label = vali_label.reshape(-1, 1) y_pred = y_pred.reshape(-1, 1) vali_label = scaler_val.inverse_transform(vali_label) y_pred = scaler_val.inverse_transform(y_pred) vali_label = vali_label.flatten() y_pred = y_pred.flatten() np.save('y_pred.npy', y_pred) np.save('vali_label_transformed.npy', vali_label) CNN_Regression.save_result( './result/regress_' + model_fn.__name__ + str(time.time()) + '.csv', y_pred, vali_label)
def kfold_gen(fold = 2, path=["./Images/","./Images2/","./Images3/","./Images4/","./Images5/"], is_scale=True): (train,label) = CNN_Regression.load_kfold(path=path, channel=3, new_size = (img_width, img_height)) # scaler = preprocessing.MaxAbsScaler() # train = scaler.fit_transform(train) scaler_val = None if is_scale: scaler_val = preprocessing.MaxAbsScaler() label = scaler_val.fit_transform(label.reshape(-1, 1)) label = label.flatten() # train, vali_train, label, vali_label = train_test_split(train,label, test_size=0.3) kf = KFold(train, label, fold, 3) # kf = MultiClassficationBootstraping(train, label, fold ) print(path) if len(path) > 1 or path == './Images/': kf.get_fold_list(start = 2, end = 2+2*len(path), label=100) else: kf.get_fold_list(start = 0, end = 2*len(path), label=100) # print(list(kf.split(train))) for i in range(0, fold): #yield train, label, _, _ = kf.getItem(i) train, label, vali_train, vali_label = kf.getItem(i) yield [train, label, vali_train, vali_label, scaler_val]
# pre-process PP = PreProcess.PreProcess(data, n_buckets=5) data = PP.fit(data) testset = PP.fit(testset) # cross-validation best_depth = 3 best_f1 = 0 for d in [10, 20, 30, 40, 50, float('inf')]: tmp = [] dt = DecisionTree.DecisionTree(max_depth=d) kfold = KFold.KFold(n_splits=5) for kf in kfold.split(data): train = [data[i] for i in kf[0]] train_label = [data_labels[i] for i in kf[0]] test = [data[i] for i in kf[1]] test_label = [data_labels[i] for i in kf[1]] dt.fit(train, train_label) predict_tmp = dt.predict(test) tmp.append(Stat.F1_Score(predict_tmp, test_label)) if np.mean(tmp) > best_f1: best_f1 = np.mean(tmp) best_depth = d
y = [] for i in range(len(path)): pathlist = os.listdir(path[i]) x_train, y_train = img_collect(pathlist,path[i], channel = channel, new_size = new_size) x += x_train y += y_train print(y_train) idx = [index for index in range(len(y))] print(len(idx)) # y, idx = zip(*sorted(zip(y,idx))) x = np.array(x) # train = x[idx] train = x label = np.array(y) if channel == 1: train = train.reshape(train.shape+(1,)) return (train,label) if __name__ == "__main__": (train,label) = load_kfold() kf = KFold(train,label,10,3) for i in range(0,10): train,label,vali_train,vali_label = kf.getItem(i) CNN(train,label,vali_train,vali_label) # CNN(train,label,vali_train,vali_label) # model = keras.models.load_model('regression_model.h5') # y_pred = model.predict(vali_train) # save_result('predict.csv',y_pred,vali_label)
from KFold import * from ngram import * from list_window import * from random import shuffle lines = DirectoryReader.get_lines_from_file("files/shakespeare.txt") words = Utilities.prepare_text(lines) ngram_csv = [] unseens_csv = [] for class_number in [1, 5, 10, 20]: for ngram_size in [2, 3, 4, 5]: fold_step = len(words) // 10 kfold = KFold(words, step=fold_step) total_correct = 0 total_tested = 0 total_unseen = 0 while kfold.has_next(): train, test = kfold.get_next() if fold_step <= len(test): ng = NGram(train, ngram_size=ngram_size, classification_number=class_number) ng.generate_counts()
from random import shuffle lines = DirectoryReader.get_lines_from_file("files/shakespeare.txt") words = Utilities.prepare_text(lines) ngram_csv = [] unseens_csv = [] for class_number in [1,5,10,20]: for ngram_size in [2,3,4,5]: fold_step = len(words)//10 kfold = KFold(words, step=fold_step) total_correct = 0 total_tested = 0 total_unseen = 0 while kfold.has_next(): train, test = kfold.get_next() if fold_step <= len(test): ng = NGram(train, ngram_size=ngram_size, classification_number = class_number) ng.generate_counts() counter = 0 counter_skip = counter+ngram_size-1
def test_get_next(self): data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] counter = 0 kfold = KFold(2, data, classes) while kfold.has_next(): train_d1, test_d1 = kfold.get_next() self.assertEquals(8, len(train_d1)) self.assertEquals(2, len(test_d1)) counter += 1 self.assertEquals(5, counter) kfold = KFold(2, data, classes) train_d1, train_c1, test_d1, test_c1 = kfold.get_next() self.assertListEqual(train_d1, [3, 4, 5, 6, 7, 8, 9, 10]) self.assertListEqual(test_d1, [1, 2]) train_d1, test_d1 = kfold.get_next() self.assertListEqual(train_d1, [1, 2, 5, 6, 7, 8, 9, 10]) self.assertEquals(test_d1, [3, 4]) kfold.get_next() kfold.get_next() kfold.get_next() self.assertListEqual(train_d1, [1, 2, 3, 4, 5, 6, 7, 8]) self.assertListEqual(test_d1, [9, 10])