Exemple #1
0
def test_get_next(self):
        data = [1,2,3,4,5,6,7,8,9,10]
        classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

        counter = 0
        kfold = KFold(2, data, classes)
        while kfold.has_next():
            train_d1, test_d1 = kfold.get_next()

            self.assertEquals(8, len(train_d1))
            self.assertEquals(2, len(test_d1))
            counter += 1

        self.assertEquals(5, counter)

        kfold = KFold(2, data, classes)

        train_d1, train_c1, test_d1, test_c1 = kfold.get_next()
        self.assertListEqual(train_d1, [3,4,5,6,7,8,9,10])
        self.assertListEqual(test_d1, [1,2])

        train_d1, test_d1= kfold.get_next()
        self.assertListEqual(train_d1, [1,2,5,6,7,8,9,10])
        self.assertEquals(test_d1, [3,4])

        kfold.get_next()
        kfold.get_next()
        kfold.get_next()

        self.assertListEqual(train_d1, [1,2,3,4,5,6,7,8])
        self.assertListEqual(test_d1, [9,10])
def test_network(data, classes, nodes, weights):

    confusion_matrix = {}
    for key1 in t_maker.mapping.keys():
        confusion_matrix[key1] = {}
        for key2 in t_maker.mapping.keys():
            confusion_matrix[key1][key2] = 0

    correct = 0
    kfold_counter = 1
    kfold = KFold(15, data, classes)
    while kfold.has_next():

        print "Starting KFold "+str(kfold_counter)

        network = Network(nodes, weights)
        network.learning_rate = 0.04

        train_d, train_c, test_d, test_c = kfold.get_next()

        # update network until it levels out
        for i in range(0, 1000):
            for j in range(0, len(train_d)):

                counter = 0
                for val in train_d[j]:
                    Network.set_inputs(counter, val, network.neurons[0])
                    counter += 1

                t_val = t_maker.get(train_c[j])
                network.feed_forward()
                network.set_errors(t_val)
                network.update_weights()

        for i in range(0, len(test_d)):

            counter = 0
            for val in test_d[i]:
                Network.set_inputs(counter, val, network.neurons[0])
                counter += 1

            network.feed_forward()
            actual = t_maker.get(test_c[i])
            guess = Network.get_class_vec_from_output(network.neurons[-1])
            are_equal = t_vec_maker.t_vecs_equal(actual, guess)

            confusion_matrix[t_maker.vec_to_val(guess)][t_maker.vec_to_val(actual)] += 1

            if are_equal:
                correct += 1

        kfold_counter += 1

    return correct, 150, confusion_matrix
Exemple #3
0
def training_kfold(fold=2, model_fn=VGG):

    (train, label) = CNN_Regression.load_kfold(
        path=["./Images/", "./Images2/", "./Images3/"],
        channel=3,
        new_size=(img_width, img_height))

    # scaler = preprocessing.MaxAbsScaler()

    # train = scaler.fit_transform(train)

    scaler_val = preprocessing.MaxAbsScaler()

    label = scaler_val.fit_transform(label.reshape(-1, 1))
    label = label.flatten()

    fold = 2
    kf = KFold(train, label, fold, 3)
    for i in range(0, fold):
        train, label, vali_train, vali_label = kf.getItem(i)
        model = model_fn(train, label, vali_train, vali_label)
        model_checkpoint = ModelCheckpoint('./modelWights/weights' +
                                           model_fn.__name__ + '.h5',
                                           monitor='val_loss',
                                           save_best_only=True)
        history = model.fit(train,
                            label,
                            batch_size=data_batch_size,
                            epochs=200,
                            validation_data=(vali_train, vali_label),
                            callbacks=[model_checkpoint])

        model.save('./modelWights/regression_model' + model_fn.__name__ +
                   '.h5')

        y_pred = model.predict(vali_train,
                               batch_size=data_batch_size,
                               verbose=1)

        # y_pred = np.load('imgs_mask_test.npy')
        vali_label = vali_label.reshape(-1, 1)
        y_pred = y_pred.reshape(-1, 1)

        vali_label = scaler_val.inverse_transform(vali_label)
        y_pred = scaler_val.inverse_transform(y_pred)
        vali_label = vali_label.flatten()
        y_pred = y_pred.flatten()
        np.save('y_pred.npy', y_pred)
        np.save('vali_label_transformed.npy', vali_label)

        CNN_Regression.save_result(
            './result/regress_' + model_fn.__name__ + str(time.time()) +
            '.csv', y_pred, vali_label)
def kfold_gen(fold = 2, path=["./Images/","./Images2/","./Images3/","./Images4/","./Images5/"], is_scale=True):

    (train,label) = CNN_Regression.load_kfold(path=path, channel=3, new_size = (img_width, img_height))
    # scaler = preprocessing.MaxAbsScaler()
    # train = scaler.fit_transform(train)
    scaler_val = None
    if is_scale:
        scaler_val = preprocessing.MaxAbsScaler()
        label = scaler_val.fit_transform(label.reshape(-1, 1))
        label = label.flatten()

    # train, vali_train, label, vali_label = train_test_split(train,label, test_size=0.3)
    kf = KFold(train, label, fold, 3)
    # kf = MultiClassficationBootstraping(train, label, fold )
    print(path)
    if len(path) > 1 or path == './Images/':
        kf.get_fold_list(start = 2, end = 2+2*len(path), label=100)
    else:
        kf.get_fold_list(start = 0, end = 2*len(path), label=100)
    # print(list(kf.split(train)))
    for i in range(0, fold):
        #yield  train, label, _, _ = kf.getItem(i)
        train, label, vali_train, vali_label = kf.getItem(i)
        yield [train, label, vali_train, vali_label, scaler_val]
Exemple #5
0
# pre-process

PP = PreProcess.PreProcess(data, n_buckets=5)
data = PP.fit(data)
testset = PP.fit(testset)

# cross-validation

best_depth = 3
best_f1 = 0
for d in [10, 20, 30, 40, 50, float('inf')]:
    tmp = []

    dt = DecisionTree.DecisionTree(max_depth=d)
    kfold = KFold.KFold(n_splits=5)

    for kf in kfold.split(data):
        train = [data[i] for i in kf[0]]
        train_label = [data_labels[i] for i in kf[0]]
        test = [data[i] for i in kf[1]]
        test_label = [data_labels[i] for i in kf[1]]

        dt.fit(train, train_label)
        predict_tmp = dt.predict(test)
        tmp.append(Stat.F1_Score(predict_tmp, test_label))

    if np.mean(tmp) > best_f1:
        best_f1 = np.mean(tmp)
        best_depth = d
    y = []
    for i in range(len(path)):
        pathlist = os.listdir(path[i])
        x_train, y_train = img_collect(pathlist,path[i], channel = channel, new_size = new_size)
        x += x_train
        y += y_train
        print(y_train)
    idx = [index for index in range(len(y))]
    print(len(idx))
    # y, idx = zip(*sorted(zip(y,idx)))
    x = np.array(x)
    # train = x[idx]
    train = x
    label = np.array(y)
    if channel == 1: train = train.reshape(train.shape+(1,))

    return (train,label)


if __name__ == "__main__":
    (train,label) = load_kfold()
    kf = KFold(train,label,10,3)
    for i in range(0,10):
        train,label,vali_train,vali_label = kf.getItem(i)
        CNN(train,label,vali_train,vali_label)
    # CNN(train,label,vali_train,vali_label)
    # model = keras.models.load_model('regression_model.h5')
    # y_pred = model.predict(vali_train)
    # save_result('predict.csv',y_pred,vali_label)

Exemple #7
0
from KFold import *
from ngram import *
from list_window import *
from random import shuffle

lines = DirectoryReader.get_lines_from_file("files/shakespeare.txt")
words = Utilities.prepare_text(lines)

ngram_csv = []
unseens_csv = []

for class_number in [1, 5, 10, 20]:
    for ngram_size in [2, 3, 4, 5]:

        fold_step = len(words) // 10
        kfold = KFold(words, step=fold_step)

        total_correct = 0
        total_tested = 0
        total_unseen = 0
        while kfold.has_next():

            train, test = kfold.get_next()

            if fold_step <= len(test):

                ng = NGram(train,
                           ngram_size=ngram_size,
                           classification_number=class_number)
                ng.generate_counts()
Exemple #8
0
from random import shuffle


lines = DirectoryReader.get_lines_from_file("files/shakespeare.txt")
words = Utilities.prepare_text(lines)

ngram_csv = []
unseens_csv = []



for class_number in [1,5,10,20]:
    for ngram_size in [2,3,4,5]:

        fold_step = len(words)//10
        kfold = KFold(words, step=fold_step)

        total_correct = 0
        total_tested = 0
        total_unseen = 0
        while kfold.has_next():

            train, test = kfold.get_next()

            if fold_step <= len(test):

                ng = NGram(train, ngram_size=ngram_size, classification_number = class_number)
                ng.generate_counts()

                counter = 0
                counter_skip = counter+ngram_size-1
Exemple #9
0
def test_get_next(self):
    data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    classes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

    counter = 0
    kfold = KFold(2, data, classes)
    while kfold.has_next():
        train_d1, test_d1 = kfold.get_next()

        self.assertEquals(8, len(train_d1))
        self.assertEquals(2, len(test_d1))
        counter += 1

    self.assertEquals(5, counter)

    kfold = KFold(2, data, classes)

    train_d1, train_c1, test_d1, test_c1 = kfold.get_next()
    self.assertListEqual(train_d1, [3, 4, 5, 6, 7, 8, 9, 10])
    self.assertListEqual(test_d1, [1, 2])

    train_d1, test_d1 = kfold.get_next()
    self.assertListEqual(train_d1, [1, 2, 5, 6, 7, 8, 9, 10])
    self.assertEquals(test_d1, [3, 4])

    kfold.get_next()
    kfold.get_next()
    kfold.get_next()

    self.assertListEqual(train_d1, [1, 2, 3, 4, 5, 6, 7, 8])
    self.assertListEqual(test_d1, [9, 10])