def train(aug_no, model_mode='mymodel', set_epochs=10, do_es=False): train_dir = os.path.join(cwd, "da_concat_{}".format(aug_no)) train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) print("train data shape : ", train_data.shape) print("train label shape : ", train_label.shape) INPUT_SIZE = train_data.shape[1] print("INPUT_SIZE: ", INPUT_SIZE) CHANNEL = train_data.shape[3] print("set channel : ", CHANNEL) batch_size = 10 print("set batch_size : ", batch_size) mh = ModelHandler(INPUT_SIZE, CHANNEL) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None history = model.fit(train_data, train_label, batch_size=batch_size, epochs=set_epochs, validation_data=(validation_data, validation_label), callbacks=es, verbose=1) # make log dir ----- if do_es: log_dir = os.path.join(cwd, 'log_with_es') else: log_dir = os.path.join(cwd, 'log') os.makedirs(log_dir, exist_ok=True) child_log_dir = os.path.join(log_dir, '{}_{}'.format(aug_no, model_mode)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, '{}_{}_model.h5'.format(aug_no, model_mode)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, '{}_{}_history.pkl'.format(aug_no, model_mode)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("export logs in ", child_log_dir)
def main(): cwd = os.getcwd() sub_prj = os.path.dirname(cwd) sub_prj_root = os.path.dirname(sub_prj) prj_root = os.path.dirname(sub_prj_root) data_dir = os.path.join(prj_root, "datasets") # data_src = os.path.join(data_dir, "small_721") data_src = os.path.join(data_dir, "cdev_origin") print("\ndata source: ", data_src) """ use_da_data = False increase_val = False print( "\nmode: Use Augmented data: {} | increase validation data: {}".format(use_da_data, increase_val) ) # First define original train_data only as train_dir train_dir = os.path.join(data_dir, "train") if (use_da_data == True) and (increase_val == False): # with_augmented data (no validation increase) train_dir = os.path.join(data_dir, "train_with_aug") validation_dir = os.path.join(data_dir, "val") # original validation data # pair of decreaced train_data and increased validation data if (increase_val == True): train_dir = os.path.join(data_dir, "red_train") if (use_da_data == True): train_dir = os.path.join(data_dir, "red_train_with_aug") validation_dir = os.path.join(data_dir, "validation") """ print("\ncreate train data") total_data, total_label = inputDataCreator(data_src, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit( total_data, total_label) print(train_data.shape) print(validation_data.shape) print(test_data.shape) print(test_label) mh = ModelHandler(224, 3) model = mh.buildTlearnModel(base='mnv1') model.summary() print("\ntraining sequence started...") start = time.time() history = model.fit(train_data, train_label, batch_size=10, epochs=30, validation_data=(validation_data, validation_label), verbose=1) elapsed_time = time.time() - start print(" total elapsed time: {} [sec]".format(elapsed_time)) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs) - 1]) # confusion matrix ----- # Predict # 0 | 1 # --+----+----- # 0 | TN | FP # label -------+----- # 1 | FN | TP idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): # precision = tp/(tp+fp) # print("Precision of the model is {}".format(precision)) # 再現率 (recall): # recall = tp/(tp+fn) # print("Recall of the model is {}".format(recall)) # save some result score, model & weights ---------- now = datetime.datetime.now() log_dir = os.path.join(sub_prj, "outputs") child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) # os.makedirs(child_log_dir, exist_ok=True) save_location = os.path.join(log_dir, "models") save_file = os.path.join(save_location, "model.h5") model.save(save_file) print("\nmodel has saved in", save_file) print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # save history save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) csv_file = os.path.join(child_log_dir, "result.csv") df_result.to_csv(csv_file) print("\nexport history in ", csv_file)
def main(N, LEARN_PATH, MODE, BUILD_MODEL, EPOCHS=60, BATCH_SIZE=20, FINE_TUNE_AT=81): total_data, total_label = inputDataCreator(os.path.join( LEARN_PATH, "natural"), 224, normalize=True) #one_hot=True print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) if MODE == 'auged': auged_dir = os.path.join(LEARN_PATH, "auged") EPOCHS = EPOCHS // 2 total_auged_data, total_auged_label = inputDataCreator(auged_dir, 224, normalize=True, one_hot=True) print("\n total auged_data : ", total_auged_data.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=5) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit( traval_data, traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) if MODE == 'auged': print("\nadd auged data to train_data...") auged_traval_data = total_auged_data[traval_idx] auged_traval_label = total_auged_label[traval_idx] auged_train_data, auged_train_label, _, _, _, _ = dataSplit( auged_traval_data, auged_traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) print(" append auged data: ", auged_train_data.shape) print("\n concatnate auged data with native data...") train_data = np.vstack((train_data, auged_train_data)) train_label = np.vstack((train_label, auged_train_label)) print(" Done.") print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True, mode='auto') print("set epochs: ", EPOCHS) if BUILD_MODEL == 'mymodel': model = mh.buildMyModel() # normal train ---------- print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, vlidation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start elif BUILD_MODEL == 'tlearn': # あとで重みの解凍をできるように base_model を定義 base_model = mh.buildMnv1Base() base_model.trainable = False model = mh.addChead(base_model) print("\ntraining sequence start .....") start = time.time() # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit(train_data, train_label, BATCH_SIZE, epochs=10, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) # fine tuning ----- print("\nfine tuning.....") mh.setFineTune(base_model, model, FINE_TUNE_AT) model.summary() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start # training end accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] log_dir = os.path.join(os.path.dirname(cwd), "flog") os.makedirs(log_dir, exist_ok=True) """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(MODE, BUILD_MODEL, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(MODE, BUILD_MODEL, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(MODE, BUILD_MODEL, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) # undefine ---------- # del total_data, total_label del traval_data, traval_label if MODE == 'auged': # del total_auged_data, total_auged_label del auged_traval_data, auged_traval_label del auged_train_data, auged_train_label del train_data, train_label del validation_data, validation_label del test_data, test_label del model del _history, history # clear session against OOM Error keras.backend.clear_session() gc.collect() k += 1 csv_file = os.path.join( log_dir, "sample_{}_{}_{}_result.csv".format(N, MODE, BUILD_MODEL)) df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file))
def main(log_dir): cwd = os.getcwd() cnn_dir = os.path.dirname(cwd) data_dir = os.path.join(cnn_dir, "dogs_vs_cats_smaller") test_dir = os.path.join(data_dir, "test") print("test dir is in ... ", test_dir) test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) print("test data's shape: ", test_data.shape) print("test label's shape: ", test_label.shape) print("test label: \n", test_label) # get model file ----- print("set log_dir: ", log_dir) child_log_list = os.listdir(log_dir) print("\nfind logs below -----") for i, child in enumerate(child_log_list): print(i, " | ", child) print("\nPlease chose one child_log by index ...") selected_child_log_idx = input(">>> ") selected_child_log_dir = child_log_list[int(selected_child_log_idx)] child_log_dir = os.path.join(log_dir, selected_child_log_dir) print("\nuse log at ", child_log_dir, "\n") #print("this directory contain : ", os.listdir(child_log_dir)) # log list [history.pkl, model&weights.h5, log] child_log_list = os.listdir(child_log_dir) for f in child_log_list: if "model.h5" in f: model_file = os.path.join(child_log_dir, f) print("Use saved model : ", model_file) model = load_model(model_file, compile=True) model.summary() # prediction ----- pred_result = model.predict(test_data, batch_size=10, verbose=1) # class 0 -> cat / class -> dog 変換 labels_class = [] for i in range(len(test_label)): if test_label[i][0] == 1: labels_class.append('cat') elif test_label[i][1] == 1: labels_class.append('dog') # 予測結果を表に起こす print(pred_result) pred = pd.DataFrame(pred_result, columns=['cat', 'dog']) pred['class'] = pred.idxmax(axis=1) pred['label'] = labels_class pred['collect'] = (pred['class'] == pred['label']) confuse = pred[pred['collect'] == False].index.tolist() collect = pred[pred['collect'] == True].index.tolist() print(pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate : ", 100 * len(confuse) / len(test_label), "%") print("\ncheck secence ...") score = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("test accuracy: ", score[1]) print("test wrong rate must be (1-accuracy): ", 1.0 - score[1]) plt.figure(figsize=(12, 6)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.01, top=0.95) plt.subplots_adjust(hspace=0.5) #plt.title("Confusion picures #={}".format(len(confuse))) n_row = 8 n_col = 8 for i in range(2): if i == 0: j = 0 for idx in confuse: plt.subplot(n_row, n_row, 1 + j) plt.imshow(test_data[idx]) plt.axis(False) plt.title("[{0}] p:{1}".format(idx, pred['class'][idx])) j += 1 else: mod = j % n_row # 最後の行のマスが余っている個数 # ちょうどまであといくつ? <= n_col - あまり nl = j + (n_col - mod) # newline for k, idx in enumerate(collect): plt.subplot(n_row, n_col, 1 + nl + k) plt.imshow(test_data[idx]) plt.axis(False) plt.title("[{0}] p:{1}".format(idx, pred['class'][idx])) img_file_place = os.path.join( child_log_dir, "{0}_AllPics_{1:%y%m%d}_{2:%H%M}.png".format(selected_child_log_dir, now, now)) plt.savefig(img_file_place) print("\nexport pictures in: ", child_log_dir, "\n")
def train(LEARN_PATH, INPUT_SIZE, CHANNEL, BATCH_SIZE, EPOCHS): # log_dir = os.path.join(cwd, "mini_log") #os.makedirs(log_dir, exist_ok=True) # train_dir = os.path.join(LEARN_PATH, "train") train_dir = os.path.join(LEARN_PATH, "train_with_aug") validation_dir = os.path.join(LEARN_PATH, "validation") test_dir = os.path.join(LEARN_PATH, "test") print("\ncreate train data") train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) if debug_lv > 0: print("train_data: ", train_data.shape) print("train_label: ", train_label.shape) if debug_lv > 1: print(train_data[0]) print(train_label) print("\ncreate validation data") validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) if debug_lv > 0: print("validation_data: ", validation_data.shape) print("validation_label: ", validation_label.shape) if debug_lv > 1: print(validation_data[0]) print(validation_label) print("\ncreate test data") test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) if debug_lv > 0: print("test_data: ", test_data.shape) print("test_label: ", test_label.shape) if debug_lv > 1: print(test_data[0]) print(test_label) mh = ModelHandler(INPUT_SIZE, CHANNEL) model = mh.buildTlearnModel(base='mnv1') model.summary() history = model.fit(train_data, train_label, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), verbose=2) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs)-1]) print("\npredict sequence...") pred = model.predict(test_data, #test_label, batch_size=BATCH_SIZE, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=BATCH_SIZE, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] print(save_dict)
def main(): cwd = os.getcwd() log_dir = os.path.join(cwd, "log") os.makedirs(log_dir, exist_ok=True) data_dir = os.path.join(cwd, "experiment_0") print("\ncreate data....") total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, #one_hot=True ) print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=10) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit(traval_data, traval_label, train_rate=2/3, validation_rate=1/3, test_rate=0) print("train_data shape: ", train_data.shape) print("train_label shape: ", train_label.shape) print("validation_data shape: ", validation_data.shape) print("validation_label shape: ", validation_label.shape) print("test_data shape: ", test_data.shape) print("test_label shape: ", test_label.shape) print("*…*…*…*…*…*…*…*…*…*…*…*…*…*…*…*") model = mh.buildTlearnModel(base='mnv1') model.summary() history = model.fit(train_data, train_label, batch_size=10, epochs=30, validation_data=(validation_data, validation_label), verbose=1) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs)-1]) print("\npredict sequence...") pred = model.predict(test_data, #test_label, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) k+=1 csv_file = "./result.csv" df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file))
def main(N, LEARN_PATH, DATA_MODE, EPOCHS=60, FINE_TUNE_AT=81): sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N)) use_da_data = False if use_da_data: train_dir = os.path.join(sample_dir, "train_with_aug") else: train_dir = os.path.join(sample_dir, "train") validation_dir = os.path.join(sample_dir, "validation") test_dir = os.path.join(sample_dir, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) input_size = train_data.shape[1] channel = train_data.shape[3] batch_size = 10 print("set epochs: ", EPOCHS) # build model ---------- mh = ModelHandler(input_size, channel) # あとで重みの解凍をできるように base_model を定義 # base_model = mh.buildMnv1Base() base_model = mh.buildVgg16Base() base_model.trainable=False model = mh.addChead(base_model) model.summary() """ es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore) """ # early stopping es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) mh.setFineTune(base_model, model, FINE_TUNE_AT) model.summary() history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print( "elapsed time (for train): {} [sec]".format(time.time() - start) ) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # 重そうなものは undefine してみる #del train_data, train_label, validation_data, validation_label, test_data, test_label del model del _history, history #del pred keras.backend.clear_session() gc.collect() return save_dict
def img2array(self, TARGET_DIR, INPUT_SIZE, NORMALIZE=False): data, label = inputDataCreator(TARGET_DIR, INPUT_SIZE, normalize=NORMALIZE) return data, label
def main(): cwd = os.getcwd() sub_prj = os.path.dirname(cwd) sub_prj_root = os.path.dirname(sub_prj) prj_root = os.path.dirname(sub_prj_root) data_dir = os.path.join(prj_root, "datasets") data_src = os.path.join(data_dir, "small_721") print("\ndata source: ", data_src) use_da_data = False if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, input_size, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, input_size, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, input_size, normalize=True, one_hot=True) """ total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit(total_data, total_label) """ print("train data shape (in batch): ", train_data.shape) print("train label shape (in batch): ", train_label.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(input_size, channel) model = mh.buildMyModel() model.summary() # instance EarlyStopping ----- es = EarlyStopping(monitor='val_loss', # monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size=batch_size, epochs=set_epochs, validation_data=(validation_data, validation_label), callbacks=[es], verbose=1) elapsed_time = time.time() - start print( "elapsed time (for train): {} [sec]".format(elapsed_time) ) # evaluate ---------- print("\nevaluate sequence...") accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs)-1]) eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # logging and detail outputs ----- # make log_dirctory log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_model_log_dir, "model.h5") model.save(model_file) print("\nexport model in ", child_model_log_dir) print("\npredict sequence...") pred = model.predict(test_data, batch_size=batch_size, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") # save history save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) csv_file = os.path.join( child_log_dir, "result.csv" ) df_result.to_csv(csv_file) print("\nexport history in ", csv_file)
def main(data_mode, model_mode, no, set_epochs=60, do_es=False): batch_size = 10 if data_mode == 'native': data_dir = os.path.join(cwd, "experiment_{}".format(no)) total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit( total_data, total_label) """ print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) """ datagen = ImageDataGenerator() train_generator = datagen.flow( train_data, train_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) validation_generator = datagen.flow( validation_data, validation_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) test_generator = datagen.flow( test_data, test_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) elif data_mode == 'auged': set_epochs = int(set_epochs / 2) data_dir = os.path.join(cwd, "concat_experiment_{}".format(no)) train_dir = os.path.join(data_dir, "train") validation_dir = os.path.join(data_dir, "validation") test_dir = os.path.join(data_dir, "test") datagen = ImageDataGenerator(rescale=1 / 255.0) train_generator = datagen.flow_from_directory(train_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = datagen.flow_from_directory( validation_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = datagen.flow_from_directory(test_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') data_sample, label_sample = next(train_generator) input_size = data_sample.shape[1] channel = data_sample.shape[3] print("input_size: {} | channel: {}".format(input_size, channel)) print("set epochs: ", set_epochs) mh = ModelHandler(input_size, channel) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, verbose=1) elapsed_time = time.time() - start accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] if do_es: log_dir = os.path.join(cwd, "log_with_es") else: log_dir = os.path.join(cwd, "log") os.makedirs(log_dir, exist_ok=True) """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(data_mode, model_mode, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict_generator(test_generator, verbose=1) label_name_list = [] if data_mode == 'auged': test_steps = test_generator.n // batch_size for i in range(test_steps): _, tmp = next(test_generator) if i == 0: test_label = tmp else: test_label = np.vstack((test_label, tmp)) for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_generator, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # undefine validable ---------- del datagen del train_generator, validation_generator, test_generator # due to CPU memory --------- # del train_data, train_label # del validation_data, validation_label # del test_data, test_label # del set_epochs # due to GPU memory --------- # del mh, del model del history # del accs, losses, val_accs, val_losses # del pred, df_pred, label_name_list # del confuse, collect # del eval_res keras.backend.clear_session() gc.collect() return save_dict
def main(data_mode, model_mode, no, set_epochs=60, do_es=False): cwd = os.getcwd() data_dir = os.path.join(cwd, "experiment_{}".format(no)) total_data, total_label = inputDataCreator( data_dir, 224, normalize=True, #one_hot=True ) print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) if data_mode == 'auged': base_dir, data_dir_name = os.path.split(data_dir) data_dir_name = "auged_" + data_dir_name auged_dir = os.path.join(base_dir, data_dir_name) set_epochs = int(set_epochs / 2) total_auged_data, total_auged_label = inputDataCreator(auged_dir, 224, normalize=True, one_hot=True) print("\n total auged_data : ", total_auged_data.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=10) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit( traval_data, traval_label, train_rate=2 / 3, validation_rate=1 / 3, test_rate=0) if data_mode == 'auged': print("\nadd auged data to train_data...") auged_traval_data = total_auged_data[traval_idx] auged_traval_label = total_auged_label[traval_idx] auged_train_data, auged_train_label, _, _, _, _ = dataSplit( auged_traval_data, auged_traval_label, train_rate=2 / 3, validation_rate=1 / 3, test_rate=0) print(" append auged data: ", auged_train_data.shape) print("\n concatnate auged data with native data...") train_data = np.vstack((train_data, auged_train_data)) train_label = np.vstack((train_label, auged_train_label)) print(" Done.") print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None batch_size = 10 print("set epochs: ", set_epochs) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size, epochs=set_epochs, validation_data=(validation_data, validation_label), callbacks=es, verbose=1) elapsed_time = time.time() - start accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] if do_es: log_dir = os.path.join(cwd, "rlog_with_es") else: log_dir = os.path.join(cwd, "rlog") os.makedirs(log_dir, exist_ok=True) child_log_dir = os.path.join( log_dir, "{}_{}_{}".format(data_mode, model_mode, no)) os.makedirs(child_log_dir, exist_ok=True) """ # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) # undefine ---------- # del total_data, total_label del traval_data, traval_label if data_mode == 'auged': # del total_auged_data, total_auged_label del auged_traval_data, auged_traval_label del auged_train_data, auged_train_label del train_data, train_label del validation_data, validation_label del test_data, test_label del model del history del pred del df_pred, label_name_list, confuse, collect del eval_res del accs, losses, val_accs, val_losses # clear session against OOM Error keras.backend.clear_session() gc.collect() k += 1 csv_file = os.path.join(child_log_dir, "{}_{}_result.csv".format(data_mode, model_mode)) df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file)) # delete valables at all in end of this program ----- del cwd, data_dir del total_data, total_label del save_dict, df_result if data_mode == 'auged': del total_auged_data, total_auged_label del base_dir, data_dir_name, auged_dir gc.collect()
def main(N, LEARN_PATH, DATA_MODE, BUILD_MODEL, EPOCHS=60): sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N)) use_da_data = False if use_da_data: train_dir = os.path.join(sample_dir, "train_with_aug") else: train_dir = os.path.join(sample_dir, "train") validation_dir = os.path.join(sample_dir, "validation") test_dir = os.path.join(sample_dir, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) input_size = train_data.shape[1] channel = train_data.shape[3] batch_size = 10 print("set epochs: ", EPOCHS) mh = ModelHandler(input_size, channel) if BUILD_MODEL == 'mymodel': model = mh.buildMyModel() elif BUILD_MODEL == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() """ es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore) """ # early stopping es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print("elapsed time (for train): {} [sec]".format(time.time() - start)) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] """ # logging and detail outputs ----- # make log_dirctory log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) """ """ if do_es: log_dir = os.path.join(cwd, "log_with_es") else: log_dir = os.path.join(cwd, "log") os.makedirs(log_dir, exist_ok=True) """ """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(DATA_MODE, BUILD_MODEL, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(DATA_MODE, BUILD_MODEL, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(DATA_MODE, BUILD_MODEL, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # 重そうなものは undefine してみる #del train_data, train_label, validation_data, validation_label, test_data, test_label del model del history #del pred keras.backend.clear_session() gc.collect() return save_dict
def main(LEARN_PATH, INPUT_SIZE, CHANNEL, BATCH_SIZE, EPOCHS): target_size = (INPUT_SIZE, INPUT_SIZE) input_shape = (INPUT_SIZE, INPUT_SIZE, CHANNEL) data_src = LEARN_PATH print("\ndata source: ", data_src) use_da_data = False if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, INPUT_SIZE, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, INPUT_SIZE, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, INPUT_SIZE, normalize=True, one_hot=True) """ total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit(total_data, total_label) """ print("train data shape (in batch): ", train_data.shape) print("train label shape (in batch): ", train_label.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(INPUT_SIZE, CHANNEL) model = mh.buildMyModel() model.summary() # instance EarlyStopping ----- es = EarlyStopping(monitor='val_loss', # monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print( "elapsed time (for train): {} [sec]".format(elapsed_time) ) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs)-1]) # evaluate ---------- print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # logging and detail outputs ----- # make log_dirctory cwd = os.getcwd() sub_prj = os.path.dirname(cwd) log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_model_log_dir, "model.h5") model.save(model_file) print("\nexport model in ", child_model_log_dir) # predict -> confusion matrix ---------- print("\npredict sequence...") pred = model.predict(test_data, batch_size=BATCH_SIZE, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") # save history save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print("save result dict:", save_dict) df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) csv_file = os.path.join( child_log_dir, "result.csv" ) df_result.to_csv(csv_file) print("\nexport result in ", csv_file) # confusion matrix ----- # Predict # 0 | 1 # --+----+----- # 0 | TN | FP # label -------+----- # 1 | FN | TP print("\nconfusion matrix") idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): # sklearn.metrics => precision_score() にも libaray がある。 # # 入力は (idx_label, idx_pred) # 「陽性と予測されたサンプルのうち, 正解したサンプルの割合」 # PPV (positive predictive value) とも呼ばれる。 precision = tp/(tp+fp) print("Precision of the model is {}".format(precision)) # 再現率 (recall: # sklearn.metrics => recall_score() にも library がある # # 入力は (idx_label, idx_pred) # 「実際に陽性のサンプルのうち, 正解したサンプルの割合」。 # sensitivity や hit rate, # TPR (true positive rate, 真陽性率) などとも呼ばれる。 # # すごく大雑把にいえば # # class 1 だけに対して考えた正解率の様な指標だと言える # # (Negative 側がどれだけ正解/不正解かは don't care) # # 逆に TN / (TN + FP) とすると # # class 0 だけに対する正解率となる。 recall = tp/(tp+fn) print("Recall of the model is {}".format(recall))