def main(): cwd = os.getcwd() sub_prj = os.path.dirname(cwd) data_dir = os.path.join(sub_prj, "datasets") data_src = os.path.join(data_dir, "small_721") print("\ndata source: ", data_src) use_da_data = True if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) # print("test_dir: ", test_dir) # data load ---------- data_gen = ImageDataGenerator(rescale=1. / 255) train_generator = data_gen.flow_from_directory(train_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = data_gen.flow_from_directory( validation_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = data_gen.flow_from_directory(test_dir, target_size=target_size, batch_size=batch_size, shuffle=False, class_mode='categorical') data_checker, label_checker = next(train_generator) print("train data shape (in batch): ", data_checker.shape) print("train label shape (in batch): ", label_checker.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(input_size, channel) # あとで重みの解凍をできるように base_model を定義 base_model = mh.buildXceptionBase() base_model.trainable = False model = mh.addChead(base_model) model.summary() # instance EarlyStopping ----- es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() print("\ntraining sequence start .....") # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], verbose=1) # fine tuning ----- print("\nfine tuning.....") mh.setFineTune(base_model, model, 108) model.summary() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], verbose=1) elapsed_time = time.time() - start print("elapsed time (for train): {} [sec]".format(time.time() - start)) print("\nevaluate sequence...") test_steps = test_generator.n // batch_size eval_res = model.evaluate_generator(test_generator, steps=test_steps, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # confusion matrix ----- print("\nconfusion matrix") pred = model.predict_generator(test_generator, steps=test_steps, verbose=3) test_label = [] for i in range(test_steps): _, tmp_tl = next(test_generator) if i == 0: test_label = tmp_tl else: test_label = np.vstack((test_label, tmp_tl)) idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): # precision = tp/(tp+fp) # print("Precision of the model is {}".format(precision)) # 再現率 (recall): # recall = tp/(tp+fn) # print("Recall of the model is {}".format(recall)) # save model ----- save_location = os.path.join(sub_prj, "outputs", "models") if use_da_data == True: save_file = os.path.join(save_location, "auged_xception_model.h5") else: save_file = os.path.join(save_location, "xception_model.h5") model.save(save_file) print("\nmodel has saved in", save_file)
def main(N, LEARN_PATH, DATA_MODE, EPOCHS=60, FINE_TUNE_AT=81): sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N)) use_da_data = False if use_da_data: train_dir = os.path.join(sample_dir, "train_with_aug") else: train_dir = os.path.join(sample_dir, "train") validation_dir = os.path.join(sample_dir, "validation") test_dir = os.path.join(sample_dir, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) input_size = train_data.shape[1] channel = train_data.shape[3] batch_size = 10 print("set epochs: ", EPOCHS) # build model ---------- mh = ModelHandler(input_size, channel) # あとで重みの解凍をできるように base_model を定義 # base_model = mh.buildMnv1Base() base_model = mh.buildVgg16Base() base_model.trainable=False model = mh.addChead(base_model) model.summary() """ es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore) """ # early stopping es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) mh.setFineTune(base_model, model, FINE_TUNE_AT) model.summary() history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print( "elapsed time (for train): {} [sec]".format(time.time() - start) ) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # 重そうなものは undefine してみる #del train_data, train_label, validation_data, validation_label, test_data, test_label del model del _history, history #del pred keras.backend.clear_session() gc.collect() return save_dict
def main(N, LEARN_PATH, MODE, BUILD_MODEL, EPOCHS=60, BATCH_SIZE=20, FINE_TUNE_AT=81): total_data, total_label = inputDataCreator(os.path.join( LEARN_PATH, "natural"), 224, normalize=True) #one_hot=True print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) if MODE == 'auged': auged_dir = os.path.join(LEARN_PATH, "auged") EPOCHS = EPOCHS // 2 total_auged_data, total_auged_label = inputDataCreator(auged_dir, 224, normalize=True, one_hot=True) print("\n total auged_data : ", total_auged_data.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=5) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit( traval_data, traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) if MODE == 'auged': print("\nadd auged data to train_data...") auged_traval_data = total_auged_data[traval_idx] auged_traval_label = total_auged_label[traval_idx] auged_train_data, auged_train_label, _, _, _, _ = dataSplit( auged_traval_data, auged_traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) print(" append auged data: ", auged_train_data.shape) print("\n concatnate auged data with native data...") train_data = np.vstack((train_data, auged_train_data)) train_label = np.vstack((train_label, auged_train_label)) print(" Done.") print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True, mode='auto') print("set epochs: ", EPOCHS) if BUILD_MODEL == 'mymodel': model = mh.buildMyModel() # normal train ---------- print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, vlidation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start elif BUILD_MODEL == 'tlearn': # あとで重みの解凍をできるように base_model を定義 base_model = mh.buildMnv1Base() base_model.trainable = False model = mh.addChead(base_model) print("\ntraining sequence start .....") start = time.time() # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit(train_data, train_label, BATCH_SIZE, epochs=10, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) # fine tuning ----- print("\nfine tuning.....") mh.setFineTune(base_model, model, FINE_TUNE_AT) model.summary() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start # training end accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] log_dir = os.path.join(os.path.dirname(cwd), "flog") os.makedirs(log_dir, exist_ok=True) """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(MODE, BUILD_MODEL, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(MODE, BUILD_MODEL, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(MODE, BUILD_MODEL, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) # undefine ---------- # del total_data, total_label del traval_data, traval_label if MODE == 'auged': # del total_auged_data, total_auged_label del auged_traval_data, auged_traval_label del auged_train_data, auged_train_label del train_data, train_label del validation_data, validation_label del test_data, test_label del model del _history, history # clear session against OOM Error keras.backend.clear_session() gc.collect() k += 1 csv_file = os.path.join( log_dir, "sample_{}_{}_{}_result.csv".format(N, MODE, BUILD_MODEL)) df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file))
def main(): cwd = os.getcwd() prj_root = os.path.dirname(cwd) data_dir = os.path.join(prj_root, "datasets") use_da_data = False increase_val = False print( "\nmode: Use Augmented data: {} | increase validation data: {}".format( use_da_data, increase_val)) # First define original train_data only as train_dir train_dir = os.path.join(data_dir, "train") if (use_da_data == True) and (increase_val == False): # with_augmented data (no validation increase) train_dir = os.path.join(data_dir, "train_with_aug") validation_dir = os.path.join(data_dir, "val") # original validation data # pair of decreaced train_data and increased validation data if (increase_val == True): train_dir = os.path.join(data_dir, "red_train") if (use_da_data == True): train_dir = os.path.join(data_dir, "red_train_with_aug") validation_dir = os.path.join(data_dir, "validation") test_dir = os.path.join(data_dir, "test") print("\ntrain_dir: ", train_dir) print("validation_dir: ", validation_dir) # data load ---------- data_gen = ImageDataGenerator(rescale=1. / 255) train_generator = data_gen.flow_from_directory(train_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = data_gen.flow_from_directory( validation_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = data_gen.flow_from_directory(test_dir, target_size=target_size, batch_size=batch_size, shuffle=False, class_mode='categorical') data_checker, label_checker = next(train_generator) print("train data shape (in batch): ", data_checker.shape) print("train label shape (in batch): ", label_checker.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(input_size, channel) # あとで重みの解凍をできるように base_model を定義 base_model = mh.buildMnv1Base() base_model.trainable = False model = mh.addChead(base_model) model.summary() # instance EarlyStopping ----- es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() print("\ntraining sequence start .....") # 準備体操 ----- print("\nwarm up sequence .....") model.summary() # | rate # Normal : 1341 | 0.26 # Penumonia: 3875 | 0.75 # total: 5216 | 1.0 # Penumonia / Normal = 2.889.. _history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], class_weight={ 0: 1.0, 1: 0.4 }, verbose=1) # fine tuning ----- print("\nfine tuning.....") mh.setFineTune(base_model, model, 81) model.summary() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], class_weight={ 0: 1.0, 1: 0.4 }, verbose=1) elapsed_time = time.time() - start print("elapsed time (for train): {} [sec]".format(time.time() - start)) print("\nevaluate sequence...") test_steps = test_generator.n // batch_size eval_res = model.evaluate_generator(test_generator, steps=test_steps, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # confusion matrix ----- print("\nconfusion matrix") pred = model.predict_generator(test_generator, steps=test_steps, verbose=3) test_label = [] for i in range(test_steps): _, tmp_tl = next(test_generator) if i == 0: test_label = tmp_tl else: test_label = np.vstack((test_label, tmp_tl)) idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): precision = tp / (tp + fp) print("Precision of the model is {}".format(precision)) # 再現率 (recall): recall = tp / (tp + fn) print("Recall of the model is {}".format(recall))