Esempio n. 1
0
def evaluate_one_fold(directory, dataset, w_path, mode):
    """
    evaluate the result of one fold, with segmentation metrics.
    directory: str, model directory path like 'regression/Adam/vgg_p4_size150'
    dataset: str, ips_ or melanoma_  1 to 5
    w_path: str, path to dataset directory, like 'weights/ips'

    output: tuple of float, segmentation scores
            (jaccard, dice, tpr, tnr, acc)
    """
    dname = dataset[-1]
    path = os.path.join(w_path, directory, dname)
    ld = os.listdir(path)
    if "label" in ld:
        # 単一resolutionの場合はそのままlabelディレクトリ読み込み
        pred_path = tl.getFilelist(os.path.join(path, "label"), ".png")
    else:
        # multi resolutionの場合は、最も解像度が高いディレクトリを読み込み
        tmp = 0
        for d in ld:
            if "label" in d and int(d[-1]) > tmp:
                tmp = int(d[-1])
        di = "label" + str(tmp)
        pred_path = tl.getFilelist(os.path.join(path, di), ".png")
    pred_path.sort()

    jaccard = []
    dice = []
    tpr = []
    tnr = []
    acc = []
    class_j = []

    # インスタンス化するために適当なパスを読み込み
    if "ips" in path:
        img_list, true_path = tl.load_datapath(dataset, mode=mode)
        labels = [1, 2, 3]
    else:
        img_list, true_path = tl.load_datapath(dataset, mode=mode)
        labels = [1, 2]
    DL = Patch_DataLoader(img_list, true_path)
    for pred, true in zip(pred_path, true_path):
        pred_name, _ = os.path.splitext(pred.split("/")[-1])
        true_name, _ = os.path.splitext(true.split("/")[-1])
        assert pred_name == true_name

        y_pred = np.array(Image.open(pred), int)
        y_true = np.array(Image.open(true), int)
        y_true = DL.image2label(y_true, evaluate=True)

        # out of region of evaluation
        oor = ~(y_true == 0) * 1
        y_pred = y_pred * oor
        j, d, tp, tn, a, c_j = evaluate_one_image(y_true, y_pred, labels)
        class_j.append(c_j)
        jaccard.append(j)
        dice.append(d)
        tpr.append(tp)
        tnr.append(tn)
        acc.append(a)
    jaccard = sum(jaccard) / len(jaccard)
    dice = sum(dice) / len(dice)
    tpr = sum(tpr) / len(tpr)
    tnr = sum(tnr) / len(tnr)
    acc = sum(acc) / len(acc)
    class_j = np.asarray(class_j)
    return jaccard, dice, tpr, tnr, acc, class_j
Esempio n. 2
0
def patch_generator(in_size,
                    size,
                    step,
                    dataset,
                    batch_size,
                    mode,
                    resolution,
                    method,
                    subsets=3):
    """
    in_size: int,
    size: int,
    step: int,
    dataset: str, "ips" or "melanoma", "_1" to "_5"
    """
    img_list, mask_list = tl.load_datapath(dataset, mode=mode)

    nb_samples = len(img_list)
    while 1:
        # 全サンプルに対して番号をつけ、シャッフルして番号順に読み込むことで
        # 逐次読み込みを実現する
        # nb_samplesがsubsetsで割り切れなくてもおけ
        index = list(range(int(subsets))) * \
            np.ceil(nb_samples / subsets).astype(int)
        # 割り切れない場合に、余分なindexを除去する
        for i in range(len(index) - nb_samples):
            index.pop()

        np.random.shuffle(index)
        for i in range(int(subsets)):  # subset loop
            # indexの番号順にサンプルを読み込む
            # bool_maskでフラグが立った画像のみ取り出せる
            bool_mask = (np.array(index) == i)
            img_subset = np.array(img_list)[bool_mask]
            mask_subset = np.array(mask_list)[bool_mask]
            img_subset = img_subset.tolist()
            mask_subset = mask_subset.tolist()
            DataLoader = Patch_DataLoader(img_subset, mask_subset, in_size,
                                          size, step, method, resolution)
            X_train, y_train = DataLoader.load_data()
            X_train, y_train = shuffle_samples(X_train, y_train)
            X_train = X_train.reshape(X_train.shape[0], in_size, in_size, 3)
            X_train /= 255.
            if method == "fcn":
                y_train = y_train.reshape(y_train.shape[0], in_size, in_size,
                                          1)
                y_train = y_train.astype(np.int32)
            elif method == "classification":
                if "melanoma" in dataset:
                    num_classes = 2
                else:
                    num_classes = 3
                y_train = np_utils.to_categorical(y_train,
                                                  num_classes=num_classes)

            batch_loop = X_train.shape[0] // batch_size

            for j in range(batch_loop):  # batch loop
                x = X_train[j * batch_size:(j + 1) * batch_size, ...]
                y = y_train[j * batch_size:(j + 1) * batch_size, ...]
                yield x, y
                # del x, y
                gc.collect()
Esempio n. 3
0
def test_fcn_model(dataset, img_size, resize_input=False, model_path="valid"):
    """
    """
    in_h, in_w = img_size
    if 'ips' in dataset:
        num_classes = 3
    else:
        num_classes = 2
    if model_path != "valid":
        model_path = os.path.join("weights", model_path,
                                  "dataset_" + dataset[-1])
    else:
        model_path = "weights/valid_all/dataset_" + dataset[-1]
    try:
        model = model_from_json(
            open(os.path.join(model_path, "train_arch.json")).read())
    except FileNotFoundError:
        model = models.FCN_8s(num_classes, (in_h, in_w, 3), 0, test=True)
    model.load_weights(os.path.join(model_path, "train_weights.h5"))

    # データ読み込み
    img_list, mask_list = tl.load_datapath(dataset, mode="test")

    print("visualize the result of " + dataset)
    # 可視化画像を保存するためのディレクトリ作成
    make_vis_dirs(model_path)

    elapsed_time = 0.
    for im in img_list:
        # 可視化画像の名前を取得
        file_name = im.split("/")[-1]
        file_name, ext = os.path.splitext(file_name)
        file_name = file_name + ".png"
        # データ読み込み
        in_img = np.zeros((1, in_h, in_w, 3)).astype(np.float32)
        im = Image.open(im)
        if resize_input:
            im = im.resize((in_w, in_h))
        img = np.array(im, dtype=np.float32) / 255.
        if in_h > img.shape[0]:
            offset = (in_h - img.shape[0]) // 2
            in_img[0, offset:offset + img.shape[0], :, :] = img[...]
        elif in_w > img.shape[1]:
            offset = (in_w - img.shape[1]) // 2
            in_img[0, :, offset:offset + img.shape[1], :] = img[...]
        else:
            in_img[0, ...] = img[...]
            offset = 0
        # 推定
        start_time = timeit.default_timer()
        pred = model.predict(in_img)
        elapsed_time += timeit.default_timer() - start_time
        pred = normalize_infmap(pred)

        if resize_input:
            pred = resample_infmap(pred)

        if in_h > img.shape[0]:
            result = pred[0, offset:offset + img.shape[0], :, :]
        elif in_w > img.shape[1]:
            result = pred[0, :, offset:offset + img.shape[1], :]
        else:
            result = pred[0, ...]
        PMC = ProbMapConstructer(result, data=dataset[:-2])
        PMC.save_InfMap(model_path, file_name)
    test_time = elapsed_time / len(img_list)
    print("test on %s takes %.7f m" % (dataset, test_time))
Esempio n. 4
0
def train_model(method, resolution, dataset, in_size, size, step, arch, opt,
                lr, epochs, batch_size, l2_reg, decay, border_weight, binary):
    """
    train models, and save weights and loss graph
    method: str: 'classification', 'regression' or 'fcn'
    resolution: list of int or None, resolution of patch
                if method is not 'resolution', this must be None
    dataset: 'ips' or 'melanoma' + '_1' to '_5'
    in_size: int, input size of network
    size: int, cropped patch size
    step: int, patch sampling step
    arch: str, network architecture
          if method is 'fcn', arch must be 'vgg_p5'
    opt: str, optimizer 'SGD' or 'Adam'
    lr: float, learning rate
    epochs: int, number of epochs to train
    batch_size: int, batch size
    l2_reg: float, l2 regularization value
    decay: float, learning rate decay, see keras.io
    border_weight: float or None
                   if you want to set weight to patches whitch contain more
                   than two classes, set this value from as float
    binary: bool
            in the case of `ce_dist`, if binary is True, then the target
            histograms are converted to one-hot vectors
            i.e. when you want to train with `majority`, set method to
            `ce_dist` and binary to True.

    output: None
    """
    m_list = [
        'regression', 'classification', 'fcn', "fcn_pre", 'fcn_norm',
        'ce_dist', 'hamming', 'sigmoid'
    ]
    if method not in m_list:
        raise ValueError()

    # データセットによるクラス数指定
    if 'ips' in dataset:
        num_classes = 3
    elif 'melanoma' in dataset:
        num_classes = 2
    else:
        raise ValueError("dataset must be ips or melanoma")

    # ネットワークの出力ユニット数指定
    if method not in ["regression", "ce_dist", "hamming", "sigmoid"]:
        if method == "classification":
            metrics = "accuracy"
            loss_f = "categorical_crossentropy"
        else:
            metrics = sparse_accuracy
            loss_f = softmax_sparse_crossentropy
        resolution = None
        out_num = num_classes
    else:
        out_num = 0
        for i in resolution:
            out_num += i**2 * num_classes
        if method == 'regression' or method == 'sigmoid':
            metrics = "mse"
            loss_f = "mean_squared_error"
        elif method == "ce_dist":
            metrics = distribution_cross_entropy
            loss_f = distribution_cross_entropy
        elif method == "hamming":
            metrics = None
            loss_f = hamming_distance

    # weights ディレクトリ作成
    try:
        n = dataset[-1]
        os.makedirs("weights/valid_all/dataset_" + str(n))
    except FileExistsError:
        pass
    dir_path = os.path.join("weights/valid_all/dataset_" + str(n))

    # モデル読み込み
    if method == "fcn":
        arch = "FCN_8s"
        print("arch : ", arch)
        in_shape = (in_size, in_size, 3)
        model = models.FCN_8s(num_classes, in_shape, l2_reg, nopad=True)
    elif method == "fcn_pre":
        arch = "FCN_8s_pretrained"
        method = "fcn"
        print("arch : ", arch)
        in_shape = (in_size, in_size, 3)
        model = models.FCN_8s_pretrained(num_classes,
                                         in_shape,
                                         l2_reg,
                                         nopad=True)
    elif method == "fcn_norm":
        arch = "FCN_8s_norm"
        print("arch : ", arch)
        in_shape = (in_size, in_size, 3)
        model = models.FCN_8s_norm(num_classes, in_shape, l2_reg, nopad=True)
        method = "fcn"
    else:
        print("arch :", arch)
        if arch == "vgg_p5":
            model = models.myVGG_p5(in_size, l2_reg, method, out_num)
        elif arch == "vgg_p4":
            model = models.myVGG_p4(in_size, l2_reg, method, out_num)
        else:
            raise ValueError("unknown arch")

    # データのパス読み込み
    img_list, mask_list = tl.load_datapath(dataset, mode="train")
    test_img_list, test_mask_list = tl.load_datapath(dataset, mode="test")

    # インスタンス化はするが読み込みはあとで行う。
    DataLoader = Patch_DataLoader(img_list,
                                  mask_list,
                                  in_size,
                                  size,
                                  step,
                                  method,
                                  resolution,
                                  border_weight=border_weight)
    if "melanoma" in dataset:
        test_DL = Patch_DataLoader(test_img_list, test_mask_list, in_size,
                                   size, TEST_STEP, method, resolution)
    else:
        test_DL = Patch_DataLoader(test_img_list, test_mask_list, in_size,
                                   size, step, method, resolution)

    # optimizer指定、モデルコンパイル
    # loss関数が引数をとる場合と場合分け
    if method not in ["ce_dist", "hamming"]:
        if opt == "SGD":
            model.compile(loss=loss_f,
                          optimizer=SGD(lr=lr, momentum=0.9, decay=decay),
                          metrics=[])
        elif opt == "Adadelta":
            lr = 1.0
            decay = 0
            model.compile(loss=loss_f, optimizer=Adadelta(), metrics=[metrics])
        elif opt == "Adam":
            model.compile(loss=loss_f,
                          optimizer=Adam(lr=lr, decay=decay),
                          metrics=[])
        else:
            raise ValueError("argument 'opt' is wrong.")
    else:
        if binary:
            print("\n method -> ce_dist, binary=True \n")

        if opt == "SGD":
            model.compile(loss=loss_f(resolution),
                          optimizer=SGD(lr=lr, momentum=0.9, decay=decay),
                          metrics=[])
        elif opt == "Adadelta":
            lr = 1.0
            decay = 0
            model.compile(loss=loss_f(resolution),
                          optimizer=Adadelta(),
                          metrics=[])
        elif opt == "Adam":
            model.compile(loss=loss_f(resolution, binary),
                          optimizer=Adam(lr=lr, decay=decay),
                          metrics=[])
        else:
            raise ValueError("argument 'opt' is wrong.")

    print("train on " + dataset)
    start_time = timeit.default_timer()
    if method != "fcn":
        # fcn以外は.fit()で学習
        if "ips" in dataset:
            if border_weight is not None:
                X_train, y_train, s_weight = DataLoader.load_data()
            else:
                X_train, y_train = DataLoader.load_data()
                s_weight = None
            print("data loaded.")
            X_train = X_train.reshape(X_train.shape[0], in_size, in_size, 3)
            X_train /= 255.
            if method == "classification":
                y_train = np_utils.to_categorical(y_train,
                                                  num_classes=num_classes)
            X_test, y_test = test_DL.load_data()
            X_test = X_test.reshape(X_test.shape[0], in_size, in_size, 3)
            X_test /= 255.
            if method == "classification":
                y_test = np_utils.to_categorical(y_test,
                                                 num_classes=num_classes)
            hist = model.fit(
                X_train,
                y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(X_test, y_test),
                sample_weight=s_weight,
                verbose=1,
            )
        else:
            steps_per_epoch = DataLoader.num_samples // batch_size
            val_step = test_DL.num_samples // batch_size
            hist = model.fit_generator(
                generator=patch_generator(in_size, size, step, dataset,
                                          batch_size, "train", resolution,
                                          method, SUBSETS),
                steps_per_epoch=steps_per_epoch,
                epochs=epochs,
                validation_data=patch_generator(in_size, size, TEST_STEP,
                                                dataset, batch_size, "test",
                                                resolution, method, SUBSETS),
                validation_steps=val_step,
                verbose=1,
            )
    else:
        # fcnはgeneratorで学習
        steps_per_epoch = DataLoader.num_samples // batch_size
        val_step = test_DL.num_samples // batch_size
        if "ips" in dataset:
            hist = model.fit_generator(
                generator=patch_generator(in_size, size, step, dataset,
                                          batch_size, "train", resolution,
                                          method),
                steps_per_epoch=steps_per_epoch,
                epochs=epochs,
                validation_data=patch_generator(in_size, size, step, dataset,
                                                batch_size, "test", resolution,
                                                method),
                validation_steps=val_step,
                verbose=1)
        else:
            hist = model.fit_generator(
                generator=patch_generator(in_size, size, step, dataset,
                                          batch_size, "train", resolution,
                                          method, SUBSETS),
                steps_per_epoch=steps_per_epoch,
                validation_data=patch_generator(in_size, size, step, dataset,
                                                batch_size, "test", resolution,
                                                method, SUBSETS),
                validation_steps=val_step,
                epochs=epochs,
            )

    elapsed_time = (timeit.default_timer() - start_time) / 60.
    print("train on %s takes %.2f m" % (dataset, elapsed_time))

    # モデル保存
    # fcnはなぜかjsonが作れないため例外処理する
    try:
        json_string = model.to_json()
        with open(os.path.join(dir_path, "train_arch.json"), "w") as file:
            file.write(json_string)
    except ValueError:
        print("couldnt save json_file, skipped")
    finally:
        model.save_weights(os.path.join(dir_path, "train_weights.h5"),
                           overwrite=True)

    # パラメータなどをresult.txtに保存
    with open(os.path.join(dir_path, "result.txt"), "w") as file:
        title = ["<<", method, arch, ">>"]
        title = " ".join(title)
        file.write(title + "\n")
        file.write("in_size, size, step:" + str((in_size, size, step)) + "\n")
        file.write("resolution:" + str(resolution) + "\n")
        file.write("lr:" + str(lr) + "\n")
        file.write("epochs:" + str(epochs) + "\n")
        file.write("batch_size:" + str(batch_size) + "\n")
        file.write("l2_reg:" + str(l2_reg) + "\n")
        file.write("decay:" + str(decay) + "\n")
        file.write("TrainingTime:%.2f m\n" % elapsed_time)

    # train loss だけプロットして保存
    loss = hist.history["loss"]
    val_loss = hist.history["val_loss"]
    nb_epoch = len(loss)
    plt.figure()
    plt.plot(range(nb_epoch), loss, label="loss")
    plt.plot(range(nb_epoch), val_loss, label="val_loss")
    plt.legend(loc='best', fontsize=10)
    plt.grid()
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.savefig(os.path.join(dir_path, "loss.png"))
    plt.close()
Esempio n. 5
0
def test_model(method,
               resolution,
               dataset,
               in_size,
               size,
               step,
               label_map=False,
               model_path="valid"):
    """
    inference
    method: str,
    dataset: str, 'ips' or 'melanoma' + '_1' to '_5'
    in_size: int,
    size: int,
    step: int,
    model_path: str, path to model path you want to test
    """
    structured = ['regression', 'sigmoid', 'ce_dist', 'hamming']
    if method not in [
            'regression', 'classification', 'fcn', 'fcn_norm', 'ce_dist',
            'hamming', 'fcn_pre', 'sigmoid'
    ]:
        raise ValueError()

    if method not in structured:
        resolution = None

    if 'ips' in dataset:
        num_classes = 3
    else:
        num_classes = 2
    if model_path != "valid":
        model_path = os.path.join("weights", model_path,
                                  "dataset_" + dataset[-1])
    else:
        model_path = "weights/valid_all/dataset_" + dataset[-1]
    try:
        if method == "ce_dist":
            out_num = 0
            for i in resolution:
                out_num += i**2 * num_classes
            model = models.myVGG_p4(in_size,
                                    0,
                                    method,
                                    out_num,
                                    num_classes,
                                    test=True)
        else:
            model = model_from_json(
                open(os.path.join(model_path, "train_arch.json")).read())
    except FileNotFoundError:
        in_shape = (in_size, in_size, 3)
        if method == "fcn" or method == "fcn_pre":
            model = models.FCN_8s(num_classes,
                                  in_shape,
                                  0,
                                  nopad=True,
                                  test=True)
        elif method == "fcn_norm":
            model = models.FCN_8s_norm(num_classes, in_shape, 0, nopad=True)
        else:
            out_num = 0
            for i in resolution:
                out_num += i**2 * num_classes
            model = models.FCN_8s_dist(num_classes,
                                       in_shape,
                                       0,
                                       out_num,
                                       nopad=True)
    model.load_weights(os.path.join(model_path, "train_weights.h5"))

    # データ読み込み
    img_list, mask_list = tl.load_datapath(dataset, mode="test")
    DataLoader = Patch_DataLoader(img_list,
                                  mask_list,
                                  in_size,
                                  size,
                                  step,
                                  method,
                                  resolution,
                                  mode="test")

    print("visualize the result of " + dataset)
    # 可視化画像を保存するためのディレクトリ作成
    # テスト時間計測のために,multi resolutionの場合は最も高い解像度のみを使用
    if method in structured and len(resolution) > 1:
        resolution = [resolution[-1]]
    make_vis_dirs(model_path, resolution)

    elapsed_time = 0.
    elapsed_map_time = 0.
    p_count = 0
    for img_path, mask_path in zip(img_list, mask_list):
        # 可視化画像の名前を取得
        file_name = img_path.split("/")[-1]
        file_name, ext = os.path.splitext(file_name)
        file_name = file_name + ".png"
        # データ読み込み
        patches, _ = DataLoader.crop_img(img_path, mask_path, to_array=True)
        height = DataLoader.height
        width = DataLoader.width
        patches = patches.reshape(patches.shape[0], in_size, in_size, 3)
        p_count += patches.shape[0]
        patches /= 255.
        # 推定
        start_time = timeit.default_timer()
        prob = model.predict(patches, batch_size=16)
        elapsed_time += timeit.default_timer() - start_time
        if method in structured and len(resolution) > 1:
            prob = prob[:, -resolution[-1]**2 * 3:]
        PMC = ProbMapConstructer(model_out=prob,
                                 size=size,
                                 step=step,
                                 origin_h=height,
                                 origin_w=width,
                                 label_map=label_map,
                                 data=dataset[:-2],
                                 resolution=resolution)
        elapsed_map_time += timeit.default_timer() - start_time
        PMC.save_InfMap(model_path, file_name)
    test_time = elapsed_time / len(img_list)
    test_time_p = elapsed_time / p_count
    time_array = np.array([test_time, test_time_p])
    print("test on %s takes %.7f s" % (dataset, test_time))
    print("test on %s takes %.7f s" % (dataset, test_time_p))
    np.savetxt(os.path.join(model_path, "test_time.txt"), time_array)