def evaluate_one_fold(directory, dataset, w_path, mode): """ evaluate the result of one fold, with segmentation metrics. directory: str, model directory path like 'regression/Adam/vgg_p4_size150' dataset: str, ips_ or melanoma_ 1 to 5 w_path: str, path to dataset directory, like 'weights/ips' output: tuple of float, segmentation scores (jaccard, dice, tpr, tnr, acc) """ dname = dataset[-1] path = os.path.join(w_path, directory, dname) ld = os.listdir(path) if "label" in ld: # 単一resolutionの場合はそのままlabelディレクトリ読み込み pred_path = tl.getFilelist(os.path.join(path, "label"), ".png") else: # multi resolutionの場合は、最も解像度が高いディレクトリを読み込み tmp = 0 for d in ld: if "label" in d and int(d[-1]) > tmp: tmp = int(d[-1]) di = "label" + str(tmp) pred_path = tl.getFilelist(os.path.join(path, di), ".png") pred_path.sort() jaccard = [] dice = [] tpr = [] tnr = [] acc = [] class_j = [] # インスタンス化するために適当なパスを読み込み if "ips" in path: img_list, true_path = tl.load_datapath(dataset, mode=mode) labels = [1, 2, 3] else: img_list, true_path = tl.load_datapath(dataset, mode=mode) labels = [1, 2] DL = Patch_DataLoader(img_list, true_path) for pred, true in zip(pred_path, true_path): pred_name, _ = os.path.splitext(pred.split("/")[-1]) true_name, _ = os.path.splitext(true.split("/")[-1]) assert pred_name == true_name y_pred = np.array(Image.open(pred), int) y_true = np.array(Image.open(true), int) y_true = DL.image2label(y_true, evaluate=True) # out of region of evaluation oor = ~(y_true == 0) * 1 y_pred = y_pred * oor j, d, tp, tn, a, c_j = evaluate_one_image(y_true, y_pred, labels) class_j.append(c_j) jaccard.append(j) dice.append(d) tpr.append(tp) tnr.append(tn) acc.append(a) jaccard = sum(jaccard) / len(jaccard) dice = sum(dice) / len(dice) tpr = sum(tpr) / len(tpr) tnr = sum(tnr) / len(tnr) acc = sum(acc) / len(acc) class_j = np.asarray(class_j) return jaccard, dice, tpr, tnr, acc, class_j
def patch_generator(in_size, size, step, dataset, batch_size, mode, resolution, method, subsets=3): """ in_size: int, size: int, step: int, dataset: str, "ips" or "melanoma", "_1" to "_5" """ img_list, mask_list = tl.load_datapath(dataset, mode=mode) nb_samples = len(img_list) while 1: # 全サンプルに対して番号をつけ、シャッフルして番号順に読み込むことで # 逐次読み込みを実現する # nb_samplesがsubsetsで割り切れなくてもおけ index = list(range(int(subsets))) * \ np.ceil(nb_samples / subsets).astype(int) # 割り切れない場合に、余分なindexを除去する for i in range(len(index) - nb_samples): index.pop() np.random.shuffle(index) for i in range(int(subsets)): # subset loop # indexの番号順にサンプルを読み込む # bool_maskでフラグが立った画像のみ取り出せる bool_mask = (np.array(index) == i) img_subset = np.array(img_list)[bool_mask] mask_subset = np.array(mask_list)[bool_mask] img_subset = img_subset.tolist() mask_subset = mask_subset.tolist() DataLoader = Patch_DataLoader(img_subset, mask_subset, in_size, size, step, method, resolution) X_train, y_train = DataLoader.load_data() X_train, y_train = shuffle_samples(X_train, y_train) X_train = X_train.reshape(X_train.shape[0], in_size, in_size, 3) X_train /= 255. if method == "fcn": y_train = y_train.reshape(y_train.shape[0], in_size, in_size, 1) y_train = y_train.astype(np.int32) elif method == "classification": if "melanoma" in dataset: num_classes = 2 else: num_classes = 3 y_train = np_utils.to_categorical(y_train, num_classes=num_classes) batch_loop = X_train.shape[0] // batch_size for j in range(batch_loop): # batch loop x = X_train[j * batch_size:(j + 1) * batch_size, ...] y = y_train[j * batch_size:(j + 1) * batch_size, ...] yield x, y # del x, y gc.collect()
def test_fcn_model(dataset, img_size, resize_input=False, model_path="valid"): """ """ in_h, in_w = img_size if 'ips' in dataset: num_classes = 3 else: num_classes = 2 if model_path != "valid": model_path = os.path.join("weights", model_path, "dataset_" + dataset[-1]) else: model_path = "weights/valid_all/dataset_" + dataset[-1] try: model = model_from_json( open(os.path.join(model_path, "train_arch.json")).read()) except FileNotFoundError: model = models.FCN_8s(num_classes, (in_h, in_w, 3), 0, test=True) model.load_weights(os.path.join(model_path, "train_weights.h5")) # データ読み込み img_list, mask_list = tl.load_datapath(dataset, mode="test") print("visualize the result of " + dataset) # 可視化画像を保存するためのディレクトリ作成 make_vis_dirs(model_path) elapsed_time = 0. for im in img_list: # 可視化画像の名前を取得 file_name = im.split("/")[-1] file_name, ext = os.path.splitext(file_name) file_name = file_name + ".png" # データ読み込み in_img = np.zeros((1, in_h, in_w, 3)).astype(np.float32) im = Image.open(im) if resize_input: im = im.resize((in_w, in_h)) img = np.array(im, dtype=np.float32) / 255. if in_h > img.shape[0]: offset = (in_h - img.shape[0]) // 2 in_img[0, offset:offset + img.shape[0], :, :] = img[...] elif in_w > img.shape[1]: offset = (in_w - img.shape[1]) // 2 in_img[0, :, offset:offset + img.shape[1], :] = img[...] else: in_img[0, ...] = img[...] offset = 0 # 推定 start_time = timeit.default_timer() pred = model.predict(in_img) elapsed_time += timeit.default_timer() - start_time pred = normalize_infmap(pred) if resize_input: pred = resample_infmap(pred) if in_h > img.shape[0]: result = pred[0, offset:offset + img.shape[0], :, :] elif in_w > img.shape[1]: result = pred[0, :, offset:offset + img.shape[1], :] else: result = pred[0, ...] PMC = ProbMapConstructer(result, data=dataset[:-2]) PMC.save_InfMap(model_path, file_name) test_time = elapsed_time / len(img_list) print("test on %s takes %.7f m" % (dataset, test_time))
def train_model(method, resolution, dataset, in_size, size, step, arch, opt, lr, epochs, batch_size, l2_reg, decay, border_weight, binary): """ train models, and save weights and loss graph method: str: 'classification', 'regression' or 'fcn' resolution: list of int or None, resolution of patch if method is not 'resolution', this must be None dataset: 'ips' or 'melanoma' + '_1' to '_5' in_size: int, input size of network size: int, cropped patch size step: int, patch sampling step arch: str, network architecture if method is 'fcn', arch must be 'vgg_p5' opt: str, optimizer 'SGD' or 'Adam' lr: float, learning rate epochs: int, number of epochs to train batch_size: int, batch size l2_reg: float, l2 regularization value decay: float, learning rate decay, see keras.io border_weight: float or None if you want to set weight to patches whitch contain more than two classes, set this value from as float binary: bool in the case of `ce_dist`, if binary is True, then the target histograms are converted to one-hot vectors i.e. when you want to train with `majority`, set method to `ce_dist` and binary to True. output: None """ m_list = [ 'regression', 'classification', 'fcn', "fcn_pre", 'fcn_norm', 'ce_dist', 'hamming', 'sigmoid' ] if method not in m_list: raise ValueError() # データセットによるクラス数指定 if 'ips' in dataset: num_classes = 3 elif 'melanoma' in dataset: num_classes = 2 else: raise ValueError("dataset must be ips or melanoma") # ネットワークの出力ユニット数指定 if method not in ["regression", "ce_dist", "hamming", "sigmoid"]: if method == "classification": metrics = "accuracy" loss_f = "categorical_crossentropy" else: metrics = sparse_accuracy loss_f = softmax_sparse_crossentropy resolution = None out_num = num_classes else: out_num = 0 for i in resolution: out_num += i**2 * num_classes if method == 'regression' or method == 'sigmoid': metrics = "mse" loss_f = "mean_squared_error" elif method == "ce_dist": metrics = distribution_cross_entropy loss_f = distribution_cross_entropy elif method == "hamming": metrics = None loss_f = hamming_distance # weights ディレクトリ作成 try: n = dataset[-1] os.makedirs("weights/valid_all/dataset_" + str(n)) except FileExistsError: pass dir_path = os.path.join("weights/valid_all/dataset_" + str(n)) # モデル読み込み if method == "fcn": arch = "FCN_8s" print("arch : ", arch) in_shape = (in_size, in_size, 3) model = models.FCN_8s(num_classes, in_shape, l2_reg, nopad=True) elif method == "fcn_pre": arch = "FCN_8s_pretrained" method = "fcn" print("arch : ", arch) in_shape = (in_size, in_size, 3) model = models.FCN_8s_pretrained(num_classes, in_shape, l2_reg, nopad=True) elif method == "fcn_norm": arch = "FCN_8s_norm" print("arch : ", arch) in_shape = (in_size, in_size, 3) model = models.FCN_8s_norm(num_classes, in_shape, l2_reg, nopad=True) method = "fcn" else: print("arch :", arch) if arch == "vgg_p5": model = models.myVGG_p5(in_size, l2_reg, method, out_num) elif arch == "vgg_p4": model = models.myVGG_p4(in_size, l2_reg, method, out_num) else: raise ValueError("unknown arch") # データのパス読み込み img_list, mask_list = tl.load_datapath(dataset, mode="train") test_img_list, test_mask_list = tl.load_datapath(dataset, mode="test") # インスタンス化はするが読み込みはあとで行う。 DataLoader = Patch_DataLoader(img_list, mask_list, in_size, size, step, method, resolution, border_weight=border_weight) if "melanoma" in dataset: test_DL = Patch_DataLoader(test_img_list, test_mask_list, in_size, size, TEST_STEP, method, resolution) else: test_DL = Patch_DataLoader(test_img_list, test_mask_list, in_size, size, step, method, resolution) # optimizer指定、モデルコンパイル # loss関数が引数をとる場合と場合分け if method not in ["ce_dist", "hamming"]: if opt == "SGD": model.compile(loss=loss_f, optimizer=SGD(lr=lr, momentum=0.9, decay=decay), metrics=[]) elif opt == "Adadelta": lr = 1.0 decay = 0 model.compile(loss=loss_f, optimizer=Adadelta(), metrics=[metrics]) elif opt == "Adam": model.compile(loss=loss_f, optimizer=Adam(lr=lr, decay=decay), metrics=[]) else: raise ValueError("argument 'opt' is wrong.") else: if binary: print("\n method -> ce_dist, binary=True \n") if opt == "SGD": model.compile(loss=loss_f(resolution), optimizer=SGD(lr=lr, momentum=0.9, decay=decay), metrics=[]) elif opt == "Adadelta": lr = 1.0 decay = 0 model.compile(loss=loss_f(resolution), optimizer=Adadelta(), metrics=[]) elif opt == "Adam": model.compile(loss=loss_f(resolution, binary), optimizer=Adam(lr=lr, decay=decay), metrics=[]) else: raise ValueError("argument 'opt' is wrong.") print("train on " + dataset) start_time = timeit.default_timer() if method != "fcn": # fcn以外は.fit()で学習 if "ips" in dataset: if border_weight is not None: X_train, y_train, s_weight = DataLoader.load_data() else: X_train, y_train = DataLoader.load_data() s_weight = None print("data loaded.") X_train = X_train.reshape(X_train.shape[0], in_size, in_size, 3) X_train /= 255. if method == "classification": y_train = np_utils.to_categorical(y_train, num_classes=num_classes) X_test, y_test = test_DL.load_data() X_test = X_test.reshape(X_test.shape[0], in_size, in_size, 3) X_test /= 255. if method == "classification": y_test = np_utils.to_categorical(y_test, num_classes=num_classes) hist = model.fit( X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), sample_weight=s_weight, verbose=1, ) else: steps_per_epoch = DataLoader.num_samples // batch_size val_step = test_DL.num_samples // batch_size hist = model.fit_generator( generator=patch_generator(in_size, size, step, dataset, batch_size, "train", resolution, method, SUBSETS), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=patch_generator(in_size, size, TEST_STEP, dataset, batch_size, "test", resolution, method, SUBSETS), validation_steps=val_step, verbose=1, ) else: # fcnはgeneratorで学習 steps_per_epoch = DataLoader.num_samples // batch_size val_step = test_DL.num_samples // batch_size if "ips" in dataset: hist = model.fit_generator( generator=patch_generator(in_size, size, step, dataset, batch_size, "train", resolution, method), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=patch_generator(in_size, size, step, dataset, batch_size, "test", resolution, method), validation_steps=val_step, verbose=1) else: hist = model.fit_generator( generator=patch_generator(in_size, size, step, dataset, batch_size, "train", resolution, method, SUBSETS), steps_per_epoch=steps_per_epoch, validation_data=patch_generator(in_size, size, step, dataset, batch_size, "test", resolution, method, SUBSETS), validation_steps=val_step, epochs=epochs, ) elapsed_time = (timeit.default_timer() - start_time) / 60. print("train on %s takes %.2f m" % (dataset, elapsed_time)) # モデル保存 # fcnはなぜかjsonが作れないため例外処理する try: json_string = model.to_json() with open(os.path.join(dir_path, "train_arch.json"), "w") as file: file.write(json_string) except ValueError: print("couldnt save json_file, skipped") finally: model.save_weights(os.path.join(dir_path, "train_weights.h5"), overwrite=True) # パラメータなどをresult.txtに保存 with open(os.path.join(dir_path, "result.txt"), "w") as file: title = ["<<", method, arch, ">>"] title = " ".join(title) file.write(title + "\n") file.write("in_size, size, step:" + str((in_size, size, step)) + "\n") file.write("resolution:" + str(resolution) + "\n") file.write("lr:" + str(lr) + "\n") file.write("epochs:" + str(epochs) + "\n") file.write("batch_size:" + str(batch_size) + "\n") file.write("l2_reg:" + str(l2_reg) + "\n") file.write("decay:" + str(decay) + "\n") file.write("TrainingTime:%.2f m\n" % elapsed_time) # train loss だけプロットして保存 loss = hist.history["loss"] val_loss = hist.history["val_loss"] nb_epoch = len(loss) plt.figure() plt.plot(range(nb_epoch), loss, label="loss") plt.plot(range(nb_epoch), val_loss, label="val_loss") plt.legend(loc='best', fontsize=10) plt.grid() plt.xlabel("epoch") plt.ylabel("loss") plt.savefig(os.path.join(dir_path, "loss.png")) plt.close()
def test_model(method, resolution, dataset, in_size, size, step, label_map=False, model_path="valid"): """ inference method: str, dataset: str, 'ips' or 'melanoma' + '_1' to '_5' in_size: int, size: int, step: int, model_path: str, path to model path you want to test """ structured = ['regression', 'sigmoid', 'ce_dist', 'hamming'] if method not in [ 'regression', 'classification', 'fcn', 'fcn_norm', 'ce_dist', 'hamming', 'fcn_pre', 'sigmoid' ]: raise ValueError() if method not in structured: resolution = None if 'ips' in dataset: num_classes = 3 else: num_classes = 2 if model_path != "valid": model_path = os.path.join("weights", model_path, "dataset_" + dataset[-1]) else: model_path = "weights/valid_all/dataset_" + dataset[-1] try: if method == "ce_dist": out_num = 0 for i in resolution: out_num += i**2 * num_classes model = models.myVGG_p4(in_size, 0, method, out_num, num_classes, test=True) else: model = model_from_json( open(os.path.join(model_path, "train_arch.json")).read()) except FileNotFoundError: in_shape = (in_size, in_size, 3) if method == "fcn" or method == "fcn_pre": model = models.FCN_8s(num_classes, in_shape, 0, nopad=True, test=True) elif method == "fcn_norm": model = models.FCN_8s_norm(num_classes, in_shape, 0, nopad=True) else: out_num = 0 for i in resolution: out_num += i**2 * num_classes model = models.FCN_8s_dist(num_classes, in_shape, 0, out_num, nopad=True) model.load_weights(os.path.join(model_path, "train_weights.h5")) # データ読み込み img_list, mask_list = tl.load_datapath(dataset, mode="test") DataLoader = Patch_DataLoader(img_list, mask_list, in_size, size, step, method, resolution, mode="test") print("visualize the result of " + dataset) # 可視化画像を保存するためのディレクトリ作成 # テスト時間計測のために,multi resolutionの場合は最も高い解像度のみを使用 if method in structured and len(resolution) > 1: resolution = [resolution[-1]] make_vis_dirs(model_path, resolution) elapsed_time = 0. elapsed_map_time = 0. p_count = 0 for img_path, mask_path in zip(img_list, mask_list): # 可視化画像の名前を取得 file_name = img_path.split("/")[-1] file_name, ext = os.path.splitext(file_name) file_name = file_name + ".png" # データ読み込み patches, _ = DataLoader.crop_img(img_path, mask_path, to_array=True) height = DataLoader.height width = DataLoader.width patches = patches.reshape(patches.shape[0], in_size, in_size, 3) p_count += patches.shape[0] patches /= 255. # 推定 start_time = timeit.default_timer() prob = model.predict(patches, batch_size=16) elapsed_time += timeit.default_timer() - start_time if method in structured and len(resolution) > 1: prob = prob[:, -resolution[-1]**2 * 3:] PMC = ProbMapConstructer(model_out=prob, size=size, step=step, origin_h=height, origin_w=width, label_map=label_map, data=dataset[:-2], resolution=resolution) elapsed_map_time += timeit.default_timer() - start_time PMC.save_InfMap(model_path, file_name) test_time = elapsed_time / len(img_list) test_time_p = elapsed_time / p_count time_array = np.array([test_time, test_time_p]) print("test on %s takes %.7f s" % (dataset, test_time)) print("test on %s takes %.7f s" % (dataset, test_time_p)) np.savetxt(os.path.join(model_path, "test_time.txt"), time_array)