Beispiel #1
0
def main():

    cwd = os.getcwd()
    sub_prj = os.path.dirname(cwd)

    data_dir = os.path.join(sub_prj, "datasets")

    data_src = os.path.join(data_dir, "small_721")
    print("\ndata source: ", data_src)

    use_da_data = True
    if use_da_data:
        train_dir = os.path.join(data_src, "train_with_aug")
    else:
        train_dir = os.path.join(data_src, "train")
    validation_dir = os.path.join(data_src, "validation")
    test_dir = os.path.join(data_src, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    # print("test_dir: ", test_dir)

    # data load ----------
    data_gen = ImageDataGenerator(rescale=1. / 255)

    train_generator = data_gen.flow_from_directory(train_dir,
                                                   target_size=target_size,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   class_mode='categorical')

    validation_generator = data_gen.flow_from_directory(
        validation_dir,
        target_size=target_size,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

    test_generator = data_gen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode='categorical')

    data_checker, label_checker = next(train_generator)

    print("train data shape (in batch): ", data_checker.shape)
    print("train label shape (in batch): ", label_checker.shape)
    # print("validation data shape:", validation_data.shape)
    # print("validation label shape:", validation_label.shape)
    # print("test data shape:", test_data.shape)
    # print("test label shape:", test_label.shape)

    # build model ----------
    mh = ModelHandler(input_size, channel)

    # あとで重みの解凍をできるように base_model を定義
    base_model = mh.buildXceptionBase()
    base_model.trainable = False

    model = mh.addChead(base_model)

    model.summary()

    # instance EarlyStopping -----
    es = EarlyStopping(monitor='val_loss',
                       patience=5,
                       verbose=1,
                       restore_best_weights=True)

    print("\ntraining sequence start .....")
    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print(steps_per_epoch, " [steps / epoch]")
    print(validation_steps, " (validation steps)")

    start = time.time()
    print("\ntraining sequence start .....")

    # 準備体操 -----
    print("\nwarm up sequence .....")
    model.summary()
    _history = model.fit_generator(train_generator,
                                   steps_per_epoch=steps_per_epoch,
                                   epochs=set_epochs,
                                   validation_data=validation_generator,
                                   validation_steps=validation_steps,
                                   callbacks=[es],
                                   verbose=1)

    # fine tuning -----
    print("\nfine tuning.....")
    mh.setFineTune(base_model, model, 108)
    model.summary()

    history = model.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=set_epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  callbacks=[es],
                                  verbose=1)
    elapsed_time = time.time() - start
    print("elapsed time (for train): {} [sec]".format(time.time() - start))

    print("\nevaluate sequence...")
    test_steps = test_generator.n // batch_size
    eval_res = model.evaluate_generator(test_generator,
                                        steps=test_steps,
                                        verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # confusion matrix -----
    print("\nconfusion matrix")
    pred = model.predict_generator(test_generator, steps=test_steps, verbose=3)

    test_label = []
    for i in range(test_steps):
        _, tmp_tl = next(test_generator)
        if i == 0:
            test_label = tmp_tl
        else:
            test_label = np.vstack((test_label, tmp_tl))

    idx_label = np.argmax(test_label, axis=-1)  # one_hot => normal
    idx_pred = np.argmax(pred, axis=-1)  # 各 class の確率 => 最も高い値を持つ class

    cm = confusion_matrix(idx_label, idx_pred)

    # Calculate Precision and Recall
    tn, fp, fn, tp = cm.ravel()

    print("  | T  | F ")
    print("--+----+---")
    print("N | {} | {}".format(tn, fn))
    print("--+----+---")
    print("P | {} | {}".format(tp, fp))

    # 適合率 (precision):
    # precision = tp/(tp+fp)
    # print("Precision of the model is {}".format(precision))

    # 再現率 (recall):
    # recall = tp/(tp+fn)
    # print("Recall of the model is {}".format(recall))

    # save model -----
    save_location = os.path.join(sub_prj, "outputs", "models")
    if use_da_data == True:
        save_file = os.path.join(save_location, "auged_xception_model.h5")
    else:
        save_file = os.path.join(save_location, "xception_model.h5")
    model.save(save_file)
    print("\nmodel has saved in", save_file)
Beispiel #2
0
def main(N, LEARN_PATH, DATA_MODE, EPOCHS=60, FINE_TUNE_AT=81):


    sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N))

    use_da_data = False
    if use_da_data:
        train_dir = os.path.join(sample_dir, "train_with_aug")
    else:
        train_dir = os.path.join(sample_dir, "train")
    validation_dir = os.path.join(sample_dir, "validation")
    test_dir = os.path.join(sample_dir, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    print("test_dir: ", test_dir)


    # data load ----------
    train_data, train_label = inputDataCreator(train_dir,
                                               224,
                                               normalize=True,
                                               one_hot=True)

    validation_data, validation_label = inputDataCreator(validation_dir,
                                                         224,
                                                         normalize=True,
                                                         one_hot=True)

    test_data, test_label = inputDataCreator(test_dir,
                                             224,
                                             normalize=True,
                                             one_hot=True)

    print("\ntrain data shape: ", train_data.shape)
    print("train label shape: ", train_label.shape)
    print("\nvalidation data shape: ", validation_data.shape)
    print("validation label shape: ", validation_label.shape)

    input_size = train_data.shape[1]
    channel = train_data.shape[3]
    batch_size = 10
    print("set epochs: ", EPOCHS)


    # build model ----------
    mh = ModelHandler(input_size, channel)

    # あとで重みの解凍をできるように base_model を定義
    # base_model = mh.buildMnv1Base()
    base_model = mh.buildVgg16Base()
    base_model.trainable=False

    model = mh.addChead(base_model)

    model.summary()

    """
    es = EarlyStopping(monitor='val_loss',
                       patience=5,
                       verbose=1,
                       mode='auto',
                       restore)
    """
    # early stopping
    es = keras.callbacks.EarlyStopping(monitor='val_loss',
                                       patience=5,
                                       restore_best_weights=True)


    print("\ntraining sequence start .....")
    start = time.time()

    # 準備体操 -----
    print("\nwarm up sequence .....")
    model.summary()
    _history = model.fit(train_data,
                         train_label,
                         batch_size,
                         epochs=EPOCHS,
                         validation_data=(validation_data, validation_label),
                         callbacks=[es],
                         verbose=2)

    mh.setFineTune(base_model, model, FINE_TUNE_AT)
    model.summary()

    history = model.fit(train_data,
                        train_label,
                        batch_size,
                        epochs=EPOCHS,
                        validation_data=(validation_data, validation_label),
                        callbacks=[es],
                        verbose=2)


    elapsed_time = time.time() - start
    print( "elapsed time (for train): {} [sec]".format(time.time() - start) )

    accs = history.history['accuracy']
    losses = history.history['loss']
    val_accs = history.history['val_accuracy']
    val_losses = history.history['val_loss']


    print("\npredict sequence...")
    pred = model.predict(test_data,
                         batch_size=10,
                         verbose=1)

    label_name_list = []
    for i in range(len(test_label)):
        if test_label[i][0] == 1:
            label_name_list.append('cat')
        elif test_label[i][1] == 1:
            label_name_list.append('dog')

    df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
    df_pred['class'] = df_pred.idxmax(axis=1)
    df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
    df_pred['collect'] = (df_pred['class'] == df_pred['label'])

    confuse = df_pred[df_pred['collect'] == False].index.tolist()
    collect = df_pred[df_pred['collect'] == True].index.tolist()

    print(df_pred)
    print("\nwrong recognized indeices are ", confuse)
    print("  wrong recognized amount is ", len(confuse))
    print("\ncollect recognized indeices are ", collect)
    print("  collect recognized amount is ", len(collect))
    print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %")


    print("\nevaluate sequence...")

    eval_res = model.evaluate(test_data,
                              test_label,
                              batch_size=10,
                              verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # ----------
    save_dict = {}
    save_dict['last_loss'] = losses[len(losses)-1]
    save_dict['last_acc'] = accs[len(accs)-1]
    save_dict['last_val_loss'] = val_losses[len(val_losses)-1]
    save_dict['last_val_acc'] = val_accs[len(val_accs)-1]
    save_dict['n_confuse'] = len(confuse)
    save_dict['eval_loss'] = eval_res[0]
    save_dict['eval_acc'] = eval_res[1]
    save_dict['elapsed_time'] = elapsed_time

    print(save_dict)

    # 重そうなものは undefine してみる
    #del train_data, train_label, validation_data, validation_label, test_data, test_label
    del model
    del _history, history
    #del pred

    keras.backend.clear_session()
    gc.collect()

    return save_dict
Beispiel #3
0
def main(N,
         LEARN_PATH,
         MODE,
         BUILD_MODEL,
         EPOCHS=60,
         BATCH_SIZE=20,
         FINE_TUNE_AT=81):

    total_data, total_label = inputDataCreator(os.path.join(
        LEARN_PATH, "natural"),
                                               224,
                                               normalize=True)
    #one_hot=True

    print("\ntotal_data shape: ", total_data.shape)
    print("total_label shape: ", total_label.shape)

    if MODE == 'auged':
        auged_dir = os.path.join(LEARN_PATH, "auged")
        EPOCHS = EPOCHS // 2

        total_auged_data, total_auged_label = inputDataCreator(auged_dir,
                                                               224,
                                                               normalize=True,
                                                               one_hot=True)
        print("\n  total auged_data : ", total_auged_data.shape)

    input_size = total_data.shape[1]
    channel = total_data.shape[3]

    mh = ModelHandler(input_size, channel)

    skf = StratifiedKFold(n_splits=5)

    k = 0
    for traval_idx, test_idx in skf.split(total_data, total_label):
        print("\nK-Fold Cross-Validation k:{} ==========".format(k))

        print("\ntrain indices: \n", traval_idx)
        print("\ntest indices: \n", test_idx)

        test_data = total_data[test_idx]
        test_label = total_label[test_idx]

        print("-----*-----*-----")

        traval_data = total_data[traval_idx]
        traval_label = total_label[traval_idx]
        # print(traval_data.shape)
        # print(traval_label.shape)

        traval_label = np.identity(2)[traval_label.astype(np.int8)]
        test_label = np.identity(2)[test_label.astype(np.int8)]

        train_data, train_label, validation_data, validation_label, _, _ = dataSplit(
            traval_data,
            traval_label,
            train_rate=3 / 4,
            validation_rate=1 / 4,
            test_rate=0)

        if MODE == 'auged':
            print("\nadd auged data to train_data...")

            auged_traval_data = total_auged_data[traval_idx]
            auged_traval_label = total_auged_label[traval_idx]

            auged_train_data, auged_train_label, _, _, _, _ = dataSplit(
                auged_traval_data,
                auged_traval_label,
                train_rate=3 / 4,
                validation_rate=1 / 4,
                test_rate=0)

            print("  append auged data: ", auged_train_data.shape)
            print("\n  concatnate auged data with native data...")
            train_data = np.vstack((train_data, auged_train_data))
            train_label = np.vstack((train_label, auged_train_label))
            print("    Done.")

        print("\ntrain data shape: ", train_data.shape)
        print("train label shape: ", train_label.shape)
        print("\nvalidation data shape: ", validation_data.shape)
        print("validation label shape: ", validation_label.shape)
        print("\ntest data shape: ", test_data.shape)
        print("test label shape: ", test_label.shape)

        es = EarlyStopping(monitor='val_loss',
                           patience=5,
                           verbose=1,
                           restore_best_weights=True,
                           mode='auto')

        print("set epochs: ", EPOCHS)

        if BUILD_MODEL == 'mymodel':
            model = mh.buildMyModel()

            # normal train ----------
            print("\ntraining sequence start .....")
            start = time.time()

            history = model.fit(train_data,
                                train_label,
                                BATCH_SIZE,
                                epochs=EPOCHS,
                                vlidation_data=(validation_data,
                                                validation_label),
                                callbacks=[es],
                                verbose=2)
            elapsed_time = time.time() - start

        elif BUILD_MODEL == 'tlearn':
            # あとで重みの解凍をできるように base_model を定義
            base_model = mh.buildMnv1Base()
            base_model.trainable = False

            model = mh.addChead(base_model)

            print("\ntraining sequence start .....")
            start = time.time()

            # 準備体操 -----
            print("\nwarm up sequence .....")
            model.summary()
            _history = model.fit(train_data,
                                 train_label,
                                 BATCH_SIZE,
                                 epochs=10,
                                 validation_data=(validation_data,
                                                  validation_label),
                                 callbacks=[es],
                                 verbose=2)

            # fine tuning -----
            print("\nfine tuning.....")
            mh.setFineTune(base_model, model, FINE_TUNE_AT)
            model.summary()

            history = model.fit(train_data,
                                train_label,
                                BATCH_SIZE,
                                epochs=EPOCHS,
                                validation_data=(validation_data,
                                                 validation_label),
                                callbacks=[es],
                                verbose=2)
            elapsed_time = time.time() - start

        # training end
        accs = history.history['accuracy']
        losses = history.history['loss']
        val_accs = history.history['val_accuracy']
        val_losses = history.history['val_loss']

        log_dir = os.path.join(os.path.dirname(cwd), "flog")
        os.makedirs(log_dir, exist_ok=True)
        """
        child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(MODE, BUILD_MODEL, no))
        os.makedirs(child_log_dir, exist_ok=True)

        # save model & weights
        model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(MODE, BUILD_MODEL, no))
        model.save(model_file)

        # save history
        history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(MODE, BUILD_MODEL, no))
        with open(history_file, 'wb') as p:
            pickle.dump(history.history, p)

        print("\nexport logs in ", child_log_dir)
        """

        print("\npredict sequence...")
        pred = model.predict(test_data, batch_size=10, verbose=2)

        label_name_list = []
        for i in range(len(test_label)):
            if test_label[i][0] == 1:
                label_name_list.append('cat')
            elif test_label[i][1] == 1:
                label_name_list.append('dog')

        df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
        df_pred['class'] = df_pred.idxmax(axis=1)
        df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
        df_pred['collect'] = (df_pred['class'] == df_pred['label'])

        confuse = df_pred[df_pred['collect'] == False].index.tolist()
        collect = df_pred[df_pred['collect'] == True].index.tolist()

        print(df_pred)
        print("\nwrong recognized indeices are ", confuse)
        print("  wrong recognized amount is ", len(confuse))
        print("\ncollect recognized indeices are ", collect)
        print("  collect recognized amount is ", len(collect))
        print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

        print("\nevaluate sequence...")

        eval_res = model.evaluate(test_data,
                                  test_label,
                                  batch_size=10,
                                  verbose=2)

        print("result loss: ", eval_res[0])
        print("result score: ", eval_res[1])

        # ----------
        save_dict = {}
        save_dict['last_loss'] = losses[len(losses) - 1]
        save_dict['last_acc'] = accs[len(accs) - 1]
        save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
        save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
        save_dict['n_confuse'] = len(confuse)
        save_dict['eval_loss'] = eval_res[0]
        save_dict['eval_acc'] = eval_res[1]
        save_dict['elapsed_time'] = elapsed_time

        print(save_dict)

        if k == 0:
            df_result = pd.DataFrame(save_dict.values(),
                                     index=save_dict.keys())
        else:
            series = pd.Series(save_dict)
            df_result[k] = series
        print(df_result)

        # undefine ----------
        # del total_data, total_label
        del traval_data, traval_label

        if MODE == 'auged':
            # del total_auged_data, total_auged_label
            del auged_traval_data, auged_traval_label
            del auged_train_data, auged_train_label

        del train_data, train_label
        del validation_data, validation_label
        del test_data, test_label

        del model
        del _history, history

        # clear session against OOM Error
        keras.backend.clear_session()
        gc.collect()

        k += 1

    csv_file = os.path.join(
        log_dir, "sample_{}_{}_{}_result.csv".format(N, MODE, BUILD_MODEL))
    df_result.to_csv(csv_file)

    print("\nexport {}  as CSV.".format(csv_file))
Beispiel #4
0
def main():

    cwd = os.getcwd()
    prj_root = os.path.dirname(cwd)

    data_dir = os.path.join(prj_root, "datasets")

    use_da_data = False
    increase_val = False
    print(
        "\nmode: Use Augmented data: {} | increase validation data: {}".format(
            use_da_data, increase_val))

    # First define original train_data only as train_dir
    train_dir = os.path.join(data_dir, "train")
    if (use_da_data == True) and (increase_val == False):
        # with_augmented data (no validation increase)
        train_dir = os.path.join(data_dir, "train_with_aug")
    validation_dir = os.path.join(data_dir, "val")  # original validation data

    # pair of decreaced train_data and increased validation data
    if (increase_val == True):
        train_dir = os.path.join(data_dir, "red_train")
        if (use_da_data == True):
            train_dir = os.path.join(data_dir, "red_train_with_aug")
        validation_dir = os.path.join(data_dir, "validation")

    test_dir = os.path.join(data_dir, "test")

    print("\ntrain_dir: ", train_dir)
    print("validation_dir: ", validation_dir)

    # data load ----------
    data_gen = ImageDataGenerator(rescale=1. / 255)

    train_generator = data_gen.flow_from_directory(train_dir,
                                                   target_size=target_size,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   class_mode='categorical')

    validation_generator = data_gen.flow_from_directory(
        validation_dir,
        target_size=target_size,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

    test_generator = data_gen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode='categorical')

    data_checker, label_checker = next(train_generator)

    print("train data shape (in batch): ", data_checker.shape)
    print("train label shape (in batch): ", label_checker.shape)
    # print("validation data shape:", validation_data.shape)
    # print("validation label shape:", validation_label.shape)
    # print("test data shape:", test_data.shape)
    # print("test label shape:", test_label.shape)

    # build model ----------
    mh = ModelHandler(input_size, channel)

    # あとで重みの解凍をできるように base_model を定義
    base_model = mh.buildMnv1Base()
    base_model.trainable = False

    model = mh.addChead(base_model)

    model.summary()

    # instance EarlyStopping -----
    es = EarlyStopping(monitor='val_loss',
                       patience=5,
                       verbose=1,
                       restore_best_weights=True)

    print("\ntraining sequence start .....")
    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print(steps_per_epoch, " [steps / epoch]")
    print(validation_steps, " (validation steps)")

    start = time.time()
    print("\ntraining sequence start .....")

    # 準備体操 -----
    print("\nwarm up sequence .....")
    model.summary()
    #                 | rate
    # Normal   : 1341 | 0.26
    # Penumonia: 3875 | 0.75
    #     total: 5216 | 1.0
    # Penumonia / Normal = 2.889..
    _history = model.fit_generator(train_generator,
                                   steps_per_epoch=steps_per_epoch,
                                   epochs=set_epochs,
                                   validation_data=validation_generator,
                                   validation_steps=validation_steps,
                                   callbacks=[es],
                                   class_weight={
                                       0: 1.0,
                                       1: 0.4
                                   },
                                   verbose=1)

    # fine tuning -----
    print("\nfine tuning.....")
    mh.setFineTune(base_model, model, 81)
    model.summary()

    history = model.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=set_epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  callbacks=[es],
                                  class_weight={
                                      0: 1.0,
                                      1: 0.4
                                  },
                                  verbose=1)

    elapsed_time = time.time() - start
    print("elapsed time (for train): {} [sec]".format(time.time() - start))

    print("\nevaluate sequence...")
    test_steps = test_generator.n // batch_size
    eval_res = model.evaluate_generator(test_generator,
                                        steps=test_steps,
                                        verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # confusion matrix -----
    print("\nconfusion matrix")
    pred = model.predict_generator(test_generator, steps=test_steps, verbose=3)

    test_label = []
    for i in range(test_steps):
        _, tmp_tl = next(test_generator)
        if i == 0:
            test_label = tmp_tl
        else:
            test_label = np.vstack((test_label, tmp_tl))

    idx_label = np.argmax(test_label, axis=-1)  # one_hot => normal
    idx_pred = np.argmax(pred, axis=-1)  # 各 class の確率 => 最も高い値を持つ class

    cm = confusion_matrix(idx_label, idx_pred)

    # Calculate Precision and Recall
    tn, fp, fn, tp = cm.ravel()

    print("  | T  | F ")
    print("--+----+---")
    print("N | {} | {}".format(tn, fn))
    print("--+----+---")
    print("P | {} | {}".format(tp, fp))

    # 適合率 (precision):
    precision = tp / (tp + fp)
    print("Precision of the model is {}".format(precision))

    # 再現率 (recall):
    recall = tp / (tp + fn)
    print("Recall of the model is {}".format(recall))