Exemple #1
0
def generate_blob_npz(mhd_file):
    global images_path
    patient_id = os.path.basename(mhd_file)[:-4]
      
    if not os.path.exists(BLOB_IMG + patient_id +'.npz'):
        lung_images = helpers.load_patient_images(patient_id, images_path, "*_i.png")#z,y,x 
        lung_masks = helpers.load_patient_images(patient_id, images_path, "*_m.png")#z,y,x
        print(patient_id,"shape",lung_images.shape,lung_masks.shape)
        #this api will save all predicted probobility cube whith npz
        unet_predict_api.get_coordzyx_candidate(model,patient_id, lung_images, lung_masks, False)    
def make_pos_annotation_images():
    src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/"
    dst_dir = settings.BASE_DIR_SSD + "luna16_train_cubes_pos/"
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "")
        # print(patient_id)
        # if not "148229375703208214308676934766" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            diam_mm = int(row["diameter"] * images.shape[2])
            anno_index = int(row["anno_index"])
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images():
    src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/"
    dst_dir = settings.BASE_DIR_SSD + "luna16_train_cubes_pos/"
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "")
        # print(patient_id)
        # if not "148229375703208214308676934766" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            diam_mm = int(row["diameter"] * images.shape[2])
            anno_index = int(row["anno_index"])
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(diam_mm) + "_1_" + "pos.png",
                          cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
Exemple #4
0
def make_pos_annotation_images_manual():

    src_dir = "resources/luna16_manual_labels/"

    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*_manual.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4" not in patient_id:
            continue

        print(patient_id)
        # if not "172845185165807139298420209778" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        #        if len(df_annos) == 0:
        #            continue

        try:
            images = helpers.load_patient_images(
                patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR,
                "*" + CUBE_IMGTYPE_SRC + ".png")
        except:
            continue
        for index, row in df_annos.iterrows():
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            diameter = int(row["d"] * images.shape[2])
            node_type = int(row["id"])
            malscore = int(diameter)
            malscore = min(25, malscore)
            malscore = max(16, malscore)
            anno_index = index
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") +
                ".png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
def make_candidate_auto_images(candidate_types=[]):
    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for candidate_type in candidate_types:
        for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"):
            os.remove(file_path)

    for candidate_type in candidate_types:
        if candidate_type == "falsepos":
            src_dir = "resources/luna16_falsepos_labels/"
        else:
            src_dir = settings.TRAIN_EXTRACTED_IMAGE_DIR + "_labels/"

        for index, csv_file in enumerate(
                glob.glob(src_dir + "*_candidates_" + candidate_type +
                          ".csv")):
            patient_id = ntpath.basename(csv_file).replace(
                "_candidates_" + candidate_type + ".csv", "")
            print(index, ",patient: ", patient_id, " type:", candidate_type)
            # if not "148229375703208214308676934766" in patient_id:
            #     continue
            df_annos = pandas.read_csv(csv_file)
            if len(df_annos) == 0:
                continue
            images = helpers.load_patient_images(
                patient_id,
                settings.TRAIN_EXTRACTED_IMAGE_DIR,
                "*" + CUBE_IMGTYPE_SRC + ".png",
                exclude_wildcards=[])

            row_no = 0
            for index, row in df_annos.iterrows():
                coord_x = int(row["coord_x"] * images.shape[2])
                coord_y = int(row["coord_y"] * images.shape[1])
                coord_z = int(row["coord_z"] * images.shape[0])
                anno_index = int(row["anno_index"])
                cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z,
                                             48)
                if cube_img.sum() < 10:
                    print("Skipping ", coord_x, coord_y, coord_z)
                    continue
                # print(cube_img.sum())
                try:
                    save_cube_img(
                        dst_dir + patient_id + "_" + str(anno_index) + "_0_" +
                        candidate_type + ".png", cube_img, 6, 8)
                except Exception as ex:
                    print(ex)

                row_no += 1
                max_item = 240 if candidate_type == "white" else 200
                if candidate_type == "luna":
                    max_item = 500
                if row_no > max_item:
                    break
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame,
                                       patient_id,
                                       view_size,
                                       luna16=False):
    src_dir = settings.LUNA_16_TRAIN_DIR2D2 if luna16 else settings.NDSB3_EXTRACTED_IMAGE_DIR
    patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png")
    delete_indices = []
    for index, row in df_nodule_predictions.iterrows():
        z_perc = row["coord_z"]
        y_perc = row["coord_y"]
        center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        mal_score = row["diameter_mm"]
        start_y = center_y - view_size / 2
        start_x = center_x - view_size / 2
        nodule_in_mask = False
        for z_index in [-1, 0, 1]:
            img = patient_mask[z_index + center_z]
            start_x = int(start_x)
            start_y = int(start_y)
            view_size = int(view_size)
            img_roi = img[start_y:start_y + view_size,
                          start_x:start_x + view_size]
            if img_roi.sum() > 255:  # more than 1 pixel of mask.
                logger.info(
                    "More than 1 pixel of mask. nodule_in_mask is true")
                nodule_in_mask = True

        if not nodule_in_mask:
            logger.info("Nodule not in mask: {0} {1} {2}".format(
                center_x, center_y, center_z))
            if mal_score > 0:
                mal_score *= -1
            df_nodule_predictions.loc[index, "diameter_mm"] = mal_score
        else:
            if center_z < 30:
                logger.info("Z < 30: {0} center z: {1}  y_perc: {2} ".format(
                    patient_id, center_z, y_perc))
                if mal_score > 0:
                    mal_score *= -1
                df_nodule_predictions.loc[index, "diameter_mm"] = mal_score

            if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85:
                logger.info(
                    "SUSPICIOUS FALSEPOSITIVE: {0}  center z: {1}  y_perc: {2}"
                    .format(patient_id, center_z, y_perc))

            if center_z < 50 and y_perc < 0.30:
                logger.info(
                    "SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: {0} center z: {1} y_perc: {2}"
                    .format(patient_id, center_z, y_perc))

    df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices],
                               inplace=True)
    return df_nodule_predictions
def make_negative_train_data_based_on_predicted_luna_nodules():
    src_dir = settings.LUNA_NODULE_DETECTION_DIR
    pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)
        pos_annos_manual = None
        manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv"
        if os.path.exists(manual_path):
            pos_annos_manual = pandas.read_csv(manual_path)

        filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True)
        pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv")
        logger.info("csv_index {0} : patient_id {1} , pos {2}".format(csv_index, patient_id, len(pos_labels)))
        patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png")
        for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"],
                                                      nod_pred_row["coord_z"], patient_imgs)
            diam_mm = nod_pred_row["diameter_mm"]
            for label_index, label_row in pos_labels.iterrows():
                px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"],
                                                          label_row["coord_z"], patient_imgs)
                dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz - pz, 2))
                if dist < keep_dist:
                    if diam_mm >= 0:
                        diam_mm *= -1
                    df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                    break

            if pos_annos_manual is not None:
                for index, label_row in pos_annos_manual.iterrows():
                    px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"],
                                                              patient_imgs)
                    diameter = label_row["d"] * patient_imgs[0].shape[1]
                    # print((pos_coord_x, pos_coord_y, pos_coord_z))
                    # print(center_float_rescaled)
                    dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2))
                    if dist < (diameter + 72):  # make sure we have a big margin
                        if diam_mm >= 0:
                            diam_mm *= -1
                        df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                        logger.info("#Too close: {0} {1} {2}".format(nx, ny, nz))
                        break

        df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False)
        total_false_pos += len(df_nodule_predictions)
    logger.info("Total false pos: {0}".format(total_false_pos))
Exemple #8
0
def make_pos_annotation_images_manual_ndsb3():
    src_dir = "resources/ndsb3_manual_labels/"
    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    train_label_df = pandas.read_csv("resources/stage1_labels.csv")
    train_label_df.set_index(["id"], inplace=True)
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4.1" in patient_id:
            continue

        cancer_label = train_label_df.loc[patient_id]["cancer"]
        df_annos = pandas.read_csv(csv_file)
        #        if len(df_annos) == 0:
        #            continue
        try:
            images = helpers.load_patient_images(
                patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR,
                "*" + CUBE_IMGTYPE_SRC + ".png")
        except:
            continue
        anno_index = 0
        for index, row in df_annos.iterrows():
            pos_neg = "pos" if row["id"] == 0 else "neg"
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            malscore = int(round(row["dmm"]))
            anno_index += 1
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue
            print(patient_id)
            assert malscore > 0 or pos_neg == "neg"
            save_cube_img(
                dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) +
                "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) +
                "_1_pn.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
Exemple #9
0
def make_annotation_images_lidc():
    src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    # 結節の座標などを示すCSVを読み込む
    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos_lidc.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv",
                                                       "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        # 患者のCT画像を読み込む
        images = helpers.load_patient_images(
            patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            # 座標
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            # 悪性腫瘍スコア
            malscore = int(row["malscore"])
            anno_index = row["anno_index"]
            anno_index = str(anno_index).replace(" ", "xspacex").replace(
                ".", "xpointx").replace("_", "xunderscorex")
            # 画像リストから64x64x64のキューブを作るう
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue

            # ファイル名: 患者ID_注釈index_悪性腫瘍スコア^2_1_post.png
            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
def make_negative_train_data_based_on_predicted_luna_nodules():
    src_dir = settings.LUNA_NODULE_DETECTION_DIR
    pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)
        pos_annos_manual = None
        manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv"
        if os.path.exists(manual_path):
            pos_annos_manual = pandas.read_csv(manual_path)

        filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True)
        pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv")
        print(csv_index, ": ", patient_id, ", pos", len(pos_labels))
        patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png")
        for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs)
            diam_mm = nod_pred_row["diameter_mm"]
            for label_index, label_row in pos_labels.iterrows():
                px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs)
                dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz- pz, 2))
                if dist < keep_dist:
                    if diam_mm >= 0:
                        diam_mm *= -1
                    df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                    break

            if pos_annos_manual is not None:
                for index, label_row in pos_annos_manual.iterrows():
                    px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs)
                    diameter = label_row["d"] * patient_imgs[0].shape[1]
                    # print((pos_coord_x, pos_coord_y, pos_coord_z))
                    # print(center_float_rescaled)
                    dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2))
                    if dist < (diameter + 72):  #  make sure we have a big margin
                        if diam_mm >= 0:
                            diam_mm *= -1
                        df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                        print("#Too close",  (nx, ny, nz))
                        break

        df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False)
        total_false_pos += len(df_nodule_predictions)
    print("Total false pos:", total_false_pos)
Exemple #11
0
def make_annotation_images_lidc():
    #https://github.com/juliandewit/kaggle_ndsb2017/issues/2
    #src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    #pdb.set_trace()

    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos_lidc.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv",
                                                       "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(
            patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            malscore = int(row["malscore"])
            anno_index = row["anno_index"]
            anno_index = str(anno_index).replace(" ", "xspacex").replace(
                ".", "xpointx").replace("_", "xunderscorex")
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame,
                                       patient_id, view_size):
    src_dir = ''
    patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed',
                                               src_dir, "*_m.png")
    delete_indices = []
    for index, row in df_nodule_predictions.iterrows():
        z_perc = row["coord_z"]
        y_perc = row["coord_y"]
        center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        mal_score = row["diameter_mm"]
        start_y = center_y - view_size / 2
        start_x = center_x - view_size / 2
        nodule_in_mask = False
        for z_index in [-1, 0, 1]:
            img = patient_mask[z_index + center_z]
            start_x = int(start_x)
            start_y = int(start_y)
            view_size = int(view_size)
            img_roi = img[start_y:start_y + view_size,
                          start_x:start_x + view_size]
            if img_roi.sum() > 255:  # more than 1 pixel of mask.
                nodule_in_mask = True

        if not nodule_in_mask:
            print("Nodule not in mask: ", (center_x, center_y, center_z))
            if mal_score > 0:
                mal_score *= -1
            df_nodule_predictions.loc[index, "diameter_mm"] = mal_score
        else:
            if center_z < 30:
                print("Z < 30: ", patient_id, " center z:", center_z,
                      " y_perc: ", y_perc)
                if mal_score > 0:
                    mal_score *= -1
                df_nodule_predictions.loc[index, "diameter_mm"] = mal_score

            if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85:
                print("SUSPICIOUS FALSEPOSITIVE: ", patient_id, " center z:",
                      center_z, " y_perc: ", y_perc)

            if center_z < 50 and y_perc < 0.30:
                print("SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: ", patient_id,
                      " center z:", center_z, " y_perc: ", y_perc)

    df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices],
                               inplace=True)
    return df_nodule_predictions
def make_candidate_auto_images(candidate_types=[]):
    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_auto/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for candidate_type in candidate_types:
        for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"):
            os.remove(file_path)

    for candidate_type in candidate_types:
        if candidate_type == "falsepos":
            src_dir = "resources/luna16_falsepos_labels/"
        else:
            src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

        for index, csv_file in enumerate(glob.glob(src_dir + "*_candidates_" + candidate_type + ".csv")):
            patient_id = ntpath.basename(csv_file).replace("_candidates_" + candidate_type + ".csv", "")
            print(index, ",patient: ", patient_id, " type:", candidate_type)
            # if not "148229375703208214308676934766" in patient_id:
            #     continue
            df_annos = pandas.read_csv(csv_file)
            if len(df_annos) == 0:
                continue
            images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png", exclude_wildcards=[])

            row_no = 0
            for index, row in df_annos.iterrows():
                coord_x = int(row["coord_x"] * images.shape[2])
                coord_y = int(row["coord_y"] * images.shape[1])
                coord_z = int(row["coord_z"] * images.shape[0])
                anno_index = int(row["anno_index"])
                cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 48)
                if cube_img.sum() < 10:
                    print("Skipping ", coord_x, coord_y, coord_z)
                    continue
                # print(cube_img.sum())
                try:
                    save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_0_" + candidate_type + ".png", cube_img, 6, 8)
                except Exception as ex:
                    print(ex)

                row_no += 1
                max_item = 240 if candidate_type == "white" else 200
                if candidate_type == "luna":
                    max_item = 500
                if row_no > max_item:
                    break
def make_pos_annotation_images_manual():
    src_dir = "resources/luna16_manual_labels/"

    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*_manual.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4" not in patient_id:
            continue

        print(patient_id)
        # if not "172845185165807139298420209778" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            diameter = int(row["d"] * images.shape[2])
            node_type = int(row["id"])
            malscore = int(diameter)
            malscore = min(25, malscore)
            malscore = max(16, malscore)
            anno_index = index
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z))
                continue

            save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual_ndsb3():
    src_dir = "resources/ndsb3_manual_labels/"
    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/ndsb3_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)


    train_label_df = pandas.read_csv("resources/stage1_labels.csv")
    train_label_df.set_index(["id"], inplace=True)
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4.1" in patient_id:
            continue

        cancer_label = train_label_df.loc[patient_id]["cancer"]
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png")

        anno_index = 0
        for index, row in df_annos.iterrows():
            pos_neg = "pos" if row["id"] == 0 else "neg"
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            malscore = int(round(row["dmm"]))
            anno_index += 1
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z))
                continue
            print(patient_id)
            assert malscore > 0 or pos_neg == "neg"
            save_cube_img(dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def filter_patient_nodules_predictions(df_nodule_predictions: pandas.DataFrame, patient_id, view_size, luna16=False):
    src_dir = settings.LUNA_16_TRAIN_DIR2D2 if luna16 else settings.NDSB3_EXTRACTED_IMAGE_DIR
    patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png")
    delete_indices = []
    for index, row in df_nodule_predictions.iterrows():
        z_perc = row["coord_z"]
        y_perc = row["coord_y"]
        center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        mal_score = row["diameter_mm"]
        start_y = center_y - view_size / 2
        start_x = center_x - view_size / 2
        nodule_in_mask = False
        for z_index in [-1, 0, 1]:
            img = patient_mask[z_index + center_z]
            start_x = int(start_x)
            start_y = int(start_y)
            view_size = int(view_size)
            img_roi = img[start_y:start_y+view_size, start_x:start_x + view_size]
            if img_roi.sum() > 255:  # more than 1 pixel of mask.
                nodule_in_mask = True

        if not nodule_in_mask:
            print("Nodule not in mask: ", (center_x, center_y, center_z))
            if mal_score > 0:
                mal_score *= -1
            df_nodule_predictions.loc[index, "diameter_mm"] = mal_score
        else:
            if center_z < 30:
                print("Z < 30: ", patient_id, " center z:", center_z, " y_perc: ",  y_perc)
                if mal_score > 0:
                    mal_score *= -1
                df_nodule_predictions.loc[index, "diameter_mm"] = mal_score


            if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85:
                print("SUSPICIOUS FALSEPOSITIVE: ", patient_id, " center z:", center_z, " y_perc: ",  y_perc)

            if center_z < 50 and y_perc < 0.30:
                print("SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: ", patient_id, " center z:", center_z, " y_perc: ",  y_perc)

    df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True)
    return df_nodule_predictions
Exemple #17
0
def filter_patient_nodules_predictions(df_nodule_predictions, patient_id,
                                       view_size):
    src_dir = LUNA16_EXTRACTED_IMAGE_DIR
    patient_mask = helpers.load_patient_images(patient_id, src_dir, "*_m.png")
    delete_indices = []
    for index, row in df_nodule_predictions.iterrows():
        z_perc = row["coord_z"]
        y_perc = row["coord_y"]
        center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        mal_score = row["diameter_mm"]
        start_y = center_y - view_size / 2
        start_x = center_x - view_size / 2
        nodule_in_mask = False
        for z_index in [-1, 0, 1]:
            img = patient_mask[z_index + center_z]
            start_x = int(start_x)
            start_y = int(start_y)
            view_size = int(view_size)
            img_roi = img[start_y:start_y + view_size,
                          start_x:start_x + view_size]
            if img_roi.sum() > 255:  # more than 1 pixel of mask.
                nodule_in_mask = True

        if not nodule_in_mask:
            if mal_score > 0:
                mal_score *= -1
            df_nodule_predictions.loc[index, "diameter_mm"] = mal_score
        else:
            if center_z < 30:
                if mal_score > 0:
                    mal_score *= -1
                df_nodule_predictions.loc[index, "diameter_mm"] = mal_score

    df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices],
                               inplace=True)
    return df_nodule_predictions
def make_annotation_images_lidc():
    src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    dst_dir = settings.BASE_DIR_SSD + "generated_traindata/luna16_train_cubes_lidc/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*_annos_pos_lidc.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            malscore = int(row["malscore"])
            anno_index = row["anno_index"]
            anno_index = str(anno_index).replace(" ", "xspacex").replace(".", "xpointx").replace("_", "xunderscorex")
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",(coord_x, coord_y, coord_z))
                continue

            save_cube_img(dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def get_48X48_cube(patient_name, nodule_info, origin, plot=False):
    images = helpers.load_patient_images(patient_name, traindata_path,
                                         "*_i.png")  #z,y,x
    mask_images = np.zeros(images.shape)
    lung_cube_py = []
    nodule_cube_mask_py = []
    negative_cube_py = []
    #step1:create image mask
    for index, row in nodule_info.iterrows():
        node_x = abs(int(round(row["coordX"] - origin[0])))  #need abs
        node_y = abs(int(round(row["coordY"] - origin[1])))
        node_z = abs(int(round(row["coordZ"] - origin[2])))
        image_coord = np.array([node_z, node_y, node_x])

        radius = int(round(row["diameter_mm"] / 2 + 1))
        for z in np.arange(-radius, radius + 1):
            for y in np.arange(-radius, radius + 1):
                for x in np.arange(-radius, radius + 1):
                    coord = np.array([z + node_z, y + node_y, x + node_x])
                    if (np.linalg.norm(coord - image_coord)) < radius:
                        mask_images[z + node_z, y + node_y,
                                    x + node_x] = int(1)

    #os.mkdir(LUNG_48X48_IMAGE_PATH + patient_name+ '/')
    #os.mkdir(NODULE_48X48_MASK_PATH + patient_name+ '/')
    #os.mkdir(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/')

    #step2: get pos sample nodule
    for index, row in nodule_info.iterrows():
        node_x = abs(int(round(row["coordX"] - origin[0])))
        node_y = abs(int(round(row["coordY"] - origin[1])))
        node_z = abs(int(round(row["coordZ"] - origin[2])))
        radius = int(round(row["diameter_mm"] / 2 + 1))

        num_per_nodule = int(
            3 *
            math.sqrt(row["diameter_mm"] + 100))  # this param can be tuning
        print(patient_name, "num_per_nodule:", num_per_nodule)

        for j in range(num_per_nodule):
            #noting:unet pos sample get
            lung_cube, lung_cube_mask = get_lung_cube(mask_images, images,
                                                      node_x, node_y, node_z,
                                                      radius)

            if plot:
                for i in range(lung_cube.shape[0]):
                    #noting me: rjust(4,0) is important, otherwise files sequence is wrong
                    cv2.imwrite(
                        LUNG_48X48_IMAGE_PATH + patient_name + "/" + "img_" +
                        str(index * 3 + j) + "_" + str(i).rjust(4, '0') +
                        "_i.png", lung_cube[i])
                    cv2.imwrite(
                        NODULE_48X48_MASK_PATH + patient_name + "/" + "img_" +
                        str(index * 3 + j) + "_" + str(i).rjust(4, '0') +
                        "_i.png", lung_cube_mask[i] * 255)

            if lung_cube.sum() > 2000:  #lung_cube pixel value: 0~255
                lung_cube_py.append(lung_cube)
                nodule_cube_mask_py.append(lung_cube_mask)

    #step3: get negative sample nodule
    print(patient_name, "pos+ nodule num:", len(lung_cube_py))
    lung_mask = helpers.load_patient_images(patient_name, traindata_path,
                                            "*_m.png")  #z,y,x
    lung_mask_shape = lung_mask.shape  #z,y,x
    for i in range(len(lung_cube_py)):
        ok = False
        while (ok == False):
            #get lung mask edge x,y,z
            coord_z = int(
                np.random.normal(lung_mask_shape[0] / 2,
                                 lung_mask_shape[0] / 6))
            coord_z = max(coord_z, 0)
            coord_z = min(coord_z, lung_mask_shape[0] - 1)
            candidate_map = lung_mask[coord_z]
            candidate_map = cv2.Canny(candidate_map.copy(), 100, 200)
            non_zero_indices = np.nonzero(candidate_map)
            if len(non_zero_indices[0]) == 0:
                continue
            nonzero_index = np.random.randint(0, len(non_zero_indices[0]) - 1)
            coord_y = non_zero_indices[0][nonzero_index]
            if coord_y > lung_mask_shape[1] * 0.85:
                continue
            coord_x = non_zero_indices[1][nonzero_index]
            real_candidate = True
            #xyz has enough distance to nodule
            for index, row in nodule_info.iterrows():
                node_x = abs(int(round(row["coordX"] - origin[0])))
                node_y = abs(int(round(row["coordY"] - origin[1])))
                node_z = abs(int(round(row["coordZ"] - origin[2])))
                image_coord = np.array([node_x, node_y, node_z])
                radius = int(round(row["diameter_mm"] / 2 + 1))

                if coord_x != node_x:
                    coord_x = np.random.randint(min(coord_x, node_x),
                                                max(coord_x, node_x))
                if coord_y != node_y:
                    coord_y = np.random.randint(min(node_y, coord_y),
                                                max(node_y, coord_y))
                if coord_z != node_z:
                    coord_z = np.random.randint(min(node_z, coord_z),
                                                max(node_z, coord_z))
                coord = np.array([coord_x, coord_y, coord_z])
                #随机获取的候选负样本 要保证其中心和正样本中心距离大于 radius+24
                #其中radius为正样本的半径 24为立方体边长的一半
                #其实也就是保证负样本与正样本不会有重合的部分
                if (np.linalg.norm(coord - image_coord) < radius + 24):
                    real_candidate = False
                    break
                else:
                    real_candidate = True

            if real_candidate:
                start_x = max(coord_x - 24,
                              0)  #coordx is we will find negative sample
                start_y = max(coord_y - 24, 0)
                start_z = max(coord_z - 24, 0)

                if (coord_x + 24 > lung_mask.shape[2] - 1):
                    start_x = lung_mask.shape[2] - 48
                if (coord_y + 24 > lung_mask.shape[1] - 1):
                    start_y = lung_mask.shape[1] - 48
                if (coord_z + 24 > lung_mask.shape[0] - 1):
                    start_z = lung_mask.shape[0] - 48
                if (lung_mask[start_z:start_z + 48, start_y:start_y + 48,
                              start_x:start_x + 48].sum() > 2000):
                    #we should guarantee the neg 48*48*48 cube is in lung mask
                    neg_candidate = images[start_z:start_z + 48,
                                           start_y:start_y + 48,
                                           start_x:start_x + 48]
                    assert (neg_candidate.shape == (48, 48, 48))
                    if plot:
                        for j in range(len(neg_candidate)):
                            cv2.imwrite(
                                LUNG_48X48_IMAGE_NEG_PATH + patient_name +
                                '/' + "img_" + str(i).rjust(4, '0') + '_' +
                                str(j) + ".png", neg_candidate[j])
                    negative_cube_py.append(neg_candidate)
                    ok = True
    assert (len(lung_cube_py) == len(negative_cube_py))
    return lung_cube_py, nodule_cube_mask_py, negative_cube_py
Exemple #20
0
 def view_image(patient):
     p = helpers.load_patient_images(patient_ids[patient], src_dir,
                                     "*_i.png")
     #plt.imshow(p[0], cmap=plt.cm.gray_r, interpolation='nearest')
     #plt.show()
     slice_images(patient, p)
Exemple #21
0
def predict_cubes(model_path,
                  continue_job,
                  only_patient_id=None,
                  luna16=False,
                  magnification=1,
                  flip=False,
                  train_data=True,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            #labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df = pandas.read_csv("resources/tc_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(
                                        p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    #patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x_perc, p_y_perc,
                                        p_z_perc, diameter_perc, nodule_chance,
                                        diameter_mm, p_x, p_y, p_z
                                    ]
                                    #patient_predictions_csv_line = [annotation_index, p_x, p_y, p_z, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "coord_x", "coord_y",
                                  "coord_z", "diameter", "nodule_chance",
                                  "diameter_mm", "abs_x", "abs_y", "abs_z"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
Exemple #22
0
def get_48X48_cube(patient_name, cand_df, origin_df, plot=False):
    #病人的CT扫描图像是一个立体的结构 相当于对肺部做了多个切面 每个切面都是一幅图像
    #从对应病人ID号的文件夹中读取CT扫描图像 包含多行
    #这里的images做了reshape 具体的形状待考证
    images = helpers.load_patient_images(patient_name, traindata_path,
                                         "*_i.png")  #z,y,x
    #掩模 大小与原始CT扫描图像相同
    mask_images = np.zeros(images.shape)
    lung_cube_py = []
    nodule_cube_mask_py = []
    negative_cube_py = []
    #step1:create image mask
    #创建图像掩模
    for index, row in origin_df.iterrows():
        node_x = int(row["coordX"])  #need abs
        node_y = int(row["coordY"])
        node_z = int(row["coordZ"])
        image_coord = np.array([node_z, node_y, node_x])  #某一个结节的中心点

        radius = int(round(row["diameter_mm"] / 2 + 1))  #某一个结节的半径
        #对以image_coord为球心 radius为半径的区域内所有点 填充1 形成掩模
        for z in np.arange(-radius, radius + 1):
            for y in np.arange(-radius, radius + 1):
                for x in np.arange(-radius, radius + 1):
                    coord = np.array([z + node_z, y + node_y, x + node_x])
                    if (np.linalg.norm(coord - image_coord)) < radius:
                        mask_images[z + node_z, y + node_y,
                                    x + node_x] = int(1)
    #if not os.path.exists(LUNG_48X48_IMAGE_PATH + patient_name+ '/'):
    #os.mkdir(LUNG_48X48_IMAGE_PATH + patient_name+ '/')
    #if not os.path.exists(NODULE_48X48_MASK_PATH + patient_name+ '/'):
    #os.mkdir(NODULE_48X48_MASK_PATH + patient_name+ '/')
    #if not os.path.exists(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/'):
    #os.mkdir(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/')

    #step2: get pos/neg sample nodule
    for index, row in origin_df.iterrows():
        node_x = int(row["coordX"])  #need abs
        node_y = int(row["coordY"])
        node_z = int(row["coordZ"])
        radius = row["diameter_mm"] / 2

        #对每个结节 设置生成正样本的个数
        num_per_nodule = int(
            TIMER *
            math.sqrt(row["diameter_mm"] + 100))  # this param can be tuning
        print(patient_name, "num_per_nodule:", num_per_nodule)

        for j in range(num_per_nodule):
            #noting:unet pos sample get
            #生成结节的正样本 得到正样本图像和对应的掩模
            #有几个需要注意的点:
            #1.生成正样本,其本身就是相当于数据增强的过程,在ration控制的范围内扰动球心,然后要检查即将生成的cube是否会超出图像边缘限制
            #2.生成的结节图像和掩模大小形状都是立方体,这也解释了根号3的由来?
            #3.最后还有一个数据增强的过程 水平垂直翻转 旋转等
            lung_cube, lung_cube_mask = helpers.get_lung_cube(
                mask_images, images, node_x, node_y, node_z, radius,
                radius / np.sqrt(3))

            if plot:
                for i in range(lung_cube.shape[0]):
                    #noting me: rjust(4,0) is important, otherwise files sequence is wrong
                    cv2.imwrite(
                        LUNG_48X48_IMAGE_PATH + patient_name + "/" + "img_" +
                        str(index * 3 + j) + "_" + str(i).rjust(4, '0') +
                        "_i.png", lung_cube[i])
                    cv2.imwrite(
                        NODULE_48X48_MASK_PATH + patient_name + "/" + "img_" +
                        str(index * 3 + j) + "_" + str(i).rjust(4, '0') +
                        "_i.png", lung_cube_mask[i] * 255)

            #这一步是保证正样本中的结节大小不至于过小?
            #忽略一些过小的结节?
            if lung_cube.sum() > 2000:  #lung_cube pixel value: 0~255
                lung_cube_py.append(lung_cube)
                nodule_cube_mask_py.append(lung_cube_mask)

        #for j in range(num_per_nodule // 2):
        ##noting:unet neg sample get
        #lung_cube, lung_cube_mask = helpers.get_lung_cube(mask_images, images, node_x,node_y,node_z,radius, 0, radius/np.sqrt(3)+1, radius + 3,type= 1)

        #if lung_cube.sum() > 2000:#lung_cube pixel value: 0~255
        #negative_cube_py.append(lung_cube)
        #if plot:
        #for i in range(len(lung_cube)):
        #cv2.imwrite(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/' + "imgpos-_" + str(index*3+j) + "_"+ str(i).rjust(4, '0') +".png", lung_cube[i])
        #cv2.imwrite(LUNG_48X48_IMAGE_NEG_PATH + patient_name+ '/' + "imgpos-_" + str(index*3+j) + "_"+ str(i).rjust(4, '0') +"_m.png", lung_cube_mask[i]*255)

    #step3: get negative sample nodule
    #获取结节负样本 就是不包含结节的区域
    print(patient_name, "pos+ nodule num:", len(lung_cube_py))
    lung_mask = helpers.load_patient_images(patient_name, traindata_path,
                                            "*_m.png")  #z,y,x
    lung_mask_shape = lung_mask.shape  #z,y,x
    for index, row in cand_df.iterrows():
        #候选负样本球心
        cand_orgin = np.array([row['coordx'], row['coordy'], row['coordz']])
        x = int(round(cand_orgin[0]))
        y = int(round(cand_orgin[1]))
        z = int(round(cand_orgin[2]))

        #候选负样本球心如果处于某个正样本的范围内
        #说明该负样本不符合要求,直接pass掉
        cand_flag = 1
        for orgin_index, origin_row in origin_df.iterrows():
            radius = origin_row['diameter_mm'] / 2
            origin = np.array([
                origin_row['coordX'], origin_row['coordY'],
                origin_row['coordZ']
            ])
            if np.linalg.norm(origin - cand_orgin) < radius:
                cand_flag = 0
                break
        #若cand_flag为1 说明候选负样本的球心不在任何结节范围内
        #可以生成相应的负样本 负样本生成以及数据增强过程与生成正样本的过程一致
        #负样本生成数量为num_per_noule//4 统一设置负样本的半径为5
        if cand_flag:
            for j in range(num_per_nodule // 4):
                lung_cube, _ = helpers.get_lung_cube(mask_images, images, x, y,
                                                     z, 5, 5 / np.sqrt(3))
                negative_cube_py.append(lung_cube)
                if plot:
                    for z in range(len(lung_cube)):
                        cv2.imwrite(
                            LUNG_48X48_IMAGE_NEG_PATH + patient_name + '/' +
                            "img_" + str(index).rjust(4, '0') + '_' + str(j) +
                            ".png", lung_cube[z])

    print('lung_cube_py', len(lung_cube_py), 'negative_cube_py',
          len(negative_cube_py))
    return lung_cube_py, nodule_cube_mask_py, negative_cube_py
def predict(csv_file_path):
    patient_id = os.path.basename(csv_file_path).replace("_candidate.csv", "")
    print(patient_id + 'start predict ...')

    # read img according .mhd file
    itk_img = sitk.ReadImage(images_raw_mhd + patient_id + '.mhd')
    #img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
    #num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = np.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    spacing = np.array(itk_img.GetSpacing())  # x,y,z spacing
    direction = np.array(itk_img.GetDirection())

    #get img shape(N, y, x) ,noting this img is real shape.
    patient_img = helpers.load_patient_images(patient_id, images_path,
                                              "*_i.png", [])
    patient_img_mask = helpers.load_patient_images(patient_id, images_path,
                                                   "*_m.png", [])

    patient_unet_csv = pd.read_csv(csv_file_path)  #coordz,coordy,coordx
    predict_csv = []
    sub_csv = []
    if patient_unet_csv is None:
        print(patient_id + "has no candidate")
    for candidate_idx, candidate_zyx in patient_unet_csv.iterrows():
        coord_z = candidate_zyx["coordz"]
        coord_y = candidate_zyx["coordy"]
        coord_x = candidate_zyx["coordx"]
        if coord_y >= patient_img.shape[0] * 0.85:
            continue

        coord_z = round(coord_z, 4)  #real z
        coord_y = round(coord_y, 4)  #real y
        coord_x = round(coord_x, 4)  #real x
        coord_z_debug = coord_z / spacing[2]
        coord_y_debug = coord_y / spacing[1]
        coord_x_debug = coord_x / spacing[0]
        submission_x = (direction[0] * coord_x + origin[0]) / direction[0]
        submission_y = (direction[4] * coord_y + origin[1]) / direction[4]
        submission_z = coord_z + origin[2]
        #modify x,y,z to prevent outsize
        start_z, start_y, start_x = modify_yxz(coord_z, coord_y, coord_x,
                                               patient_img.shape, cube_size)
        #get patient cube img
        cube_img = patient_img[start_z:start_z + cube_size,
                               start_y:start_y + cube_size,
                               start_x:start_x + cube_size]
        cube_img_mask = patient_img_mask[start_z:start_z + cube_size,
                                         start_y:start_y + cube_size,
                                         start_x:start_x + cube_size]
        if cube_img_mask.sum() < 2000:
            continue

        img_prep = prepare_image_for_net3D(cube_img)
        p = model.predict(img_prep)
        nodule_chance = p[0][0]
        predict_csv.append(
            [patient_id, coord_x, coord_y, coord_z, nodule_chance])
        sub_csv.append([
            patient_id, submission_x, submission_y, submission_z, nodule_chance
        ])
    print(patient_id + 'predict over...')
    return predict_csv, sub_csv
Exemple #24
0
def make_predicted_luna_nodules():
    src_dir = settings.TEST_NODULE_DETECTION_DIR + 'predictions10_luna16_fs/'
    pos_labels_dir = settings.TEST_NODULE_DETECTION_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)

        df_nodule_predictions = filter_patient_nodules_predictions(
            df_nodule_predictions, patient_id, CUBE_SIZE, luna16=False)

        patient_imgs = helpers.load_patient_images(
            patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_m.png")
        # patient_space=pandas.read_csv('../DSB2017/data/ndsb3_extracted_images/patient_spacing.csv')
        # p_pace = patient_space.loc[patient_space.patient_id==patient_id]
        df_nodule_predictions.sort_values(by='nodule_chance',
                                          ascending=False,
                                          inplace=True)
        df_nodule_predictions_copy = df_nodule_predictions.copy()
        for nod_pred_index, nod_pred_row in df_nodule_predictions_copy.iterrows(
        ):
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"],
                                                      nod_pred_row["coord_y"],
                                                      nod_pred_row["coord_z"],
                                                      patient_imgs)
            candidate_diameter = 6
            for index, row in df_nodule_predictions.iterrows():
                x, y, z = helpers.percentage_to_pixels(row["coord_x"],
                                                       row["coord_y"],
                                                       row["coord_z"],
                                                       patient_imgs)
                dist = math.sqrt(
                    math.pow(nx - x, 2) + math.pow(ny - y, 2) +
                    math.pow(nz - z, 2))
                if dist < (candidate_diameter +
                           48) and dist > 1:  #  make sure we have a big margin
                    ok = False
                    print("# Too close")
                    mal_score = row["diameter_mm"]
                    if nod_pred_row['nodule_chance'] > row['nodule_chance']:
                        if mal_score > 0:
                            mal_score *= -1
                        df_nodule_predictions.loc[index,
                                                  "diameter_mm"] = mal_score
                    continue

        # for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
        #     if nod_pred_row["diameter_mm"] < 0:
        #         continue
        #     nx, ny, nz = helpers.percentage_to_orig(nod_pred_row["coord_x"],p_pace['spacing_x'], nod_pred_row["coord_y"],p_pace['spacing_y'], nod_pred_row["coord_z"],p_pace['spacing_z'], patient_imgs)
        #     df_nodule_predictions.loc[nod_pred_index, "coord_x"] = nx
        #     df_nodule_predictions.loc[nod_pred_index, "coord_y"] = ny
        #     df_nodule_predictions.loc[nod_pred_index, "coord_z"] = nz
        #     # diam_mm = nod_pred_row["diameter_mm"]

        # df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id +
                                     "_candidates_1.csv",
                                     index=False)
        df_nodule_predictions = df_nodule_predictions[
            df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions = df_nodule_predictions[
            df_nodule_predictions["nodule_chance"] >= 0.9]
        # df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["nodule_chance"] >= 0.9]
        del df_nodule_predictions['diameter']
        del df_nodule_predictions['diameter_mm']
        del df_nodule_predictions['anno_index']
        df_nodule_predictions['seriesuid'] = patient_id
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id +
                                     "_candidates.csv",
                                     index=False)
        total_false_pos += len(df_nodule_predictions)
    print("Total false pos:", total_false_pos)
Exemple #25
0
def get_patient_xyz_do(src_path, patient_id, f_path):
    df_node = pandas.read_csv(f_path)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape  #heightXwidth constitute the transverse plane
    origin = numpy.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(
        itk_img.GetSpacing())  # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(
        itk_img.GetDirection())  # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = helpers.load_patient_images(
        patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_i.png")

    pos_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_percent = numpy.array(
            [annotation["coordX"], annotation["coordY"], annotation["coordZ"]])
        node_scaled = node_percent * (patient_imgs.swapaxes(0, 2).shape)
        node_float = (node_scaled * settings.TARGET_VOXEL_MM) + origin
        node_x = node_float[0]
        if flip_direction_x:
            node_x *= -1
        node_y = node_float[1]
        if flip_direction_y:
            node_y *= -1
        node_z = node_float[2]
        print("Node org (x,y,z,diam): ",
              (round(node_x, 2), round(node_y, 2), round(node_z, 2)))

        pos_annos.append([
            patient_id,
            round(node_x, 4),
            round(node_y, 4),
            round(node_z, 4), annotation['probability']
        ])
        anno_index += 1

    # df_annos = pandas.DataFrame(pos_annos, columns=["patient_id", "coord_x", "coord_y", "coord_z","probability"])
    # df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv", index=False)
    return pos_annos
def predict_cubes(path,
                  model_path,
                  magnification=1,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):

    dst_dir = settings.LUNA_NODULE_DETECTION_DIR

    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""

    dst_dir += "predictions" + str(int(
        magnification * 10)) + holdout_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE,
                                                             CUBE_SIZE,
                                                             CUBE_SIZE, 1),
                                                load_weight_path=model_path)

    patient_id = path

    all_predictions_csv = []

    if holdout_no is not None:
        patient_fold = helpers.get_patient_fold(patient_id)
        patient_fold %= fold_count

    print(": ", patient_id)
    csv_target_path = dst_dir + patient_id + ".csv"
    print(patient_id)

    try:
        patient_img = helpers.load_patient_images(patient_id + '_Preprocessed',
                                                  '', "*_i.png", [])
    except:
        print('Please Re-Process the dicom file again')

    if magnification != 1:
        patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1),
                                                     magnification)

    patient_mask = helpers.load_patient_images(patient_id + '_Preprocessed',
                                               '', "*_m.png", [])
    if magnification != 1:
        patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1),
                                                      magnification,
                                                      is_mask_image=True)

        # patient_img = patient_img[:, ::-1, :]
        # patient_mask = patient_mask[:, ::-1, :]

    step = PREDICT_STEP
    CROP_SIZE = CUBE_SIZE
    # CROP_SIZE = 48

    predict_volume_shape_list = [0, 0, 0]
    for dim in range(3):
        dim_indent = 0
        while dim_indent + CROP_SIZE < patient_img.shape[dim]:
            predict_volume_shape_list[dim] += 1
            dim_indent += step

    predict_volume_shape = (predict_volume_shape_list[0],
                            predict_volume_shape_list[1],
                            predict_volume_shape_list[2])
    predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
    print("Predict volume shape: ", predict_volume.shape)
    done_count = 0
    skipped_count = 0
    batch_size = 128
    batch_list = []
    batch_list_coords = []
    patient_predictions_csv = []
    cube_img = None
    annotation_index = 0

    for z in range(0, predict_volume_shape[0]):
        for y in range(0, predict_volume_shape[1]):
            for x in range(0, predict_volume_shape[2]):
                #if cube_img is None:
                cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                       y * step:y * step + CROP_SIZE,
                                       x * step:x * step + CROP_SIZE]
                cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                         y * step:y * step + CROP_SIZE,
                                         x * step:x * step + CROP_SIZE]

                if cube_mask.sum() < 2000:
                    skipped_count += 1

                    if CROP_SIZE != CUBE_SIZE:
                        cube_img = helpers.rescale_patient_images2(
                            cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                        # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                        # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                    img_prep = prepare_image_for_net3D(cube_img)
                    batch_list.append(img_prep)
                    batch_list_coords.append((z, y, x))
                    if len(batch_list) % batch_size == 0:
                        batch_data = numpy.vstack(batch_list)

                        p = model.predict(batch_data, batch_size=batch_size)
                        for i in range(len(p[0])):
                            p_z = batch_list_coords[i][0]
                            p_y = batch_list_coords[i][1]
                            p_x = batch_list_coords[i][2]
                            nodule_chance = p[0][i][0]
                            predict_volume[p_z, p_y, p_x] = nodule_chance
                            if nodule_chance > P_TH:
                                p_z = p_z * step + CROP_SIZE / 2
                                p_y = p_y * step + CROP_SIZE / 2
                                p_x = p_x * step + CROP_SIZE / 2

                                p_z_perc = round(p_z / patient_img.shape[0], 4)
                                p_y_perc = round(p_y / patient_img.shape[1], 4)
                                p_x_perc = round(p_x / patient_img.shape[2], 4)
                                diameter_mm = round(p[1][i][0], 4)
                                # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    2 * step / patient_img.shape[2], 4)
                                diameter_perc = round(
                                    diameter_mm / patient_img.shape[2], 4)
                                nodule_chance = round(nodule_chance, 4)
                                patient_predictions_csv_line = [
                                    annotation_index, p_x_perc, p_y_perc,
                                    p_z_perc, diameter_perc, nodule_chance,
                                    diameter_mm
                                ]
                                patient_predictions_csv.append(
                                    patient_predictions_csv_line)
                                all_predictions_csv.append(
                                    [patient_id] +
                                    patient_predictions_csv_line)
                                annotation_index += 1

                        batch_list = []
                        batch_list_coords = []
                done_count += 1
                if done_count % 10000 == 0:
                    print("Done: ", done_count, " skipped:", skipped_count)

    df = pandas.DataFrame(patient_predictions_csv,
                          columns=[
                              "anno_index", "coord_x", "coord_y", "coord_z",
                              "diameter", "nodule_chance", "diameter_mm"
                          ])
    print("Started Filtering")
    print(all_predictions_csv)
    #print(batch_data)
    filter_patient_nodules_predictions(df, patient_id,
                                       CROP_SIZE * magnification)
    df.to_csv(csv_target_path, index=False)

    # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
    # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
    # for index, row in df.iterrows():
    #     if row["diameter_mm"] < 0:
    #         print("Dropping")
    #         anno_index = row["anno_index"]
    #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
    #
    # df_features.to_csv(csv_target_path_features, index=False)

    # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
    # df.to_csv("c:/tmp/tmp2.csv", index=False)

    print(predict_volume.mean())
    print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def process_luna_candidates_patient(src_path, patient_id):
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv("../../data/csv/train/annotations.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    #print("Img array: ", img_array.shape)
    #print("Pos annos: ", len(df_pos_annos))
    num_z, height, width = img_array.shape  #heightXwidth constitute the transverse plane
    origin = numpy.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    #print("Origin (x,y,z): ", origin)
    spacing = numpy.array(
        itk_img.GetSpacing())  # spacing of voxels in world coor. (mm)
    #print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    #print("Rescale: ", rescale)
    direction = numpy.array(
        itk_img.GetDirection())  # x,y,z  Origin in world coordinates (mm)
    #print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        #print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        #print("Swappint y origin")
    #print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv("../../data/csv/train/annotations.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    #src_df = src_df[src_df["class"] == 0]
    patient_imgs = helpers.load_patient_images(
        patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = candiate_row["diameter_mm"]
        # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float -
                                 origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]
        candidate_list.append([
            len(candidate_list),
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(candidate_diameter / patient_imgs.shape[0], 4), 0
        ])

    df_candidates = pandas.DataFrame(candidate_list,
                                     columns=[
                                         "anno_index", "coord_x", "coord_y",
                                         "coord_z", "diameter", "malscore"
                                     ])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv",
                         index=False)
def generate_fp_npy(fp_df_all, annotation):
    negative_cube_py = []
    uids = sorted(fp_df_all['seriesuid'].unique())
    print('uid num:', len(uids))
    for id in tqdm(uids):
        print(id)
        images = helpers.load_patient_images(id, traindata_path,
                                             "*_i.png")  #z,y,x
        fp_df = fp_df_all[fp_df_all['seriesuid'] == id]
        #origin_df = annotation[annotation['seriesuid'] == id]

        for index, row in fp_df.iterrows():  # false positive
            if row['class'] == 0:
                for _ in range(num_per_nodule):
                    x = int(row['coordX'])
                    y = int(row['coordY'])
                    z = int(row['coordZ'])
                    lung_cube = helpers.get_lung_cube(None, images, x, y, z,
                                                      fp_radius,
                                                      fp_radius / np.sqrt(3))
                    negative_cube_py.append(lung_cube)
            #fp_orgin = np.array([row['coordX'], row['coordY'], row['coordZ']])
            #x = int(round(fp_orgin[0]))
            #y = int(round(fp_orgin[1]))
            #z = int(round(fp_orgin[2]))
            #fp_flag = 1
            #for orgin_index, origin_row in origin_df.iterrows():#really positive
            #radius = origin_row['diameter_mm'] / 2
            #origin = np.array([origin_row['coordX'], origin_row['coordY'], origin_row['coordZ']])
            #if np.linalg.norm(origin - fp_orgin) < radius:
            #print(fp_orgin)
            #fp_flag = 0
            #break
            #if fp_flag:
            #for _ in range(num_per_nodule):
            #lung_cube = helpers.get_lung_cube(None, images, x,y,z,fp_radius, fp_radius/np.sqrt(3))
            #negative_cube_py.append(lung_cube)

    negative_cube_py = np.array(negative_cube_py, dtype=np.uint8)  #neg sample
    neg_shape = negative_cube_py.shape
    negative_cube_py = np.expand_dims(negative_cube_py, axis=-1)
    print(negative_cube_py.shape)
    #shuffle
    rand_ii = np.random.choice(range(neg_shape[0]),
                               size=neg_shape[0],
                               replace=False)
    negative_cube_py = negative_cube_py[rand_ii]

    #generate some litter sample
    if traindata_path == setting.LUNA_IMG:
        average_neg_index = np.array(np.linspace(0, neg_shape[0], 31),
                                     dtype=np.int)  # 30000+ / 30
        print(average_neg_index)

        for i in trange(30):
            start_neg = average_neg_index[i]
            end_neg = average_neg_index[i + 1]
            np.savez_compressed(false_positive_path + "trainImages_neg600_" +
                                str(i).rjust(4, '0') + ".npz",
                                arr_0=negative_cube_py[start_neg:end_neg])
    else:
        np.savez_compressed(false_positive_path + "trainImages_neg_val.npz",
                            arr_0=negative_cube_py)
def process_luna_candidates_patient(src_path, patient_id):
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv("../../data/csv/train/annotations.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    #print("Img array: ", img_array.shape)
    #print("Pos annos: ", len(df_pos_annos))
    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    #print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    #print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    #print("Rescale: ", rescale)
    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    #print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        #print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        #print("Swappint y origin")
    #print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv("../../data/csv/train/annotations.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    #src_df = src_df[src_df["class"] == 0]
    patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = candiate_row["diameter_mm"]
        # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]
        candidate_list.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0])

    df_candidates = pandas.DataFrame(candidate_list, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
def predict_cubes(model_path, continue_job, only_patient_id=None, luna16=False, magnification=1, flip=False, train_data=True, holdout_no=-1, ext_name="", fold_count=2):
    if luna16:
        dst_dir = settings.LUNA_NODULE_DETECTION_DIR
    else:
        dst_dir = settings.NDSB3_NODULE_DETECTION_DIR
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    holdout_ext = ""
    # if holdout_no is not None:
    #     holdout_ext = "_h" + str(holdout_no) if holdout_no >= 0 else ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(magnification * 10)) + holdout_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = helpers.Stopwatch.start_new()
    model = step2_train_nodule_detector.get_net(input_shape=(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, 1), load_weight_path=model_path)
    if not luna16:
        if train_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            labels_df = pandas.read_csv("resources/stage2_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(settings.NDSB3_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(settings.NDSB3_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if holdout_no is not None and train_data:
            patient_fold = helpers.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != holdout_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step+CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step+CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data, batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(patient_predictions_csv_line)
                                    all_predictions_csv.append([patient_id] + patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        filter_patient_nodules_predictions(df, patient_id, CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        # cols = ["anno_index", "nodule_chance", "diamete_mm"] + ["f" + str(i) for i in range(64)]
        # df_features = pandas.DataFrame(patient_features_csv, columns=cols)
        # for index, row in df.iterrows():
        #     if row["diameter_mm"] < 0:
        #         print("Dropping")
        #         anno_index = row["anno_index"]
        #         df_features.drop(df_features[df_features["anno_index"] == anno_index].index, inplace=True)
        #
        # df_features.to_csv(csv_target_path_features, index=False)

        # df = pandas.DataFrame(all_predictions_csv, columns=["patient_id", "anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"])
        # df.to_csv("c:/tmp/tmp2.csv", index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
def process_luna_candidates_patient(src_path, patient_id):
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv(dst_dir + patient_id + "_annos_pos_lidc.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    pos_annos_manual = None
    manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    print("Pos annos: ", len(df_pos_annos))

    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv("resources/luna16_annotations/" + "candidates_V2.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    src_df = src_df[src_df["class"] == 0]
    patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = 6
        # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]

        ok = True

        for index, row in df_pos_annos.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            dist = math.sqrt(math.pow(pos_coord_x - coord_x, 2) + math.pow(pos_coord_y - coord_y, 2) + math.pow(pos_coord_z - coord_z, 2))
            if dist < (diameter + 64):  #  make sure we have a big margin
                ok = False
                print("################### Too close", (coord_x, coord_y, coord_z))
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):  #  make sure we have a big margin
                    ok = False
                    print("################### Too close", center_float_rescaled)
                    break

        if not ok:
            continue

        candidate_list.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0])

    df_candidates = pandas.DataFrame(candidate_list, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)
Exemple #32
0
def predict_cubes(patient_ids,
                  z0,
                  model_path,
                  magnification=1,
                  flip=False,
                  holdout_no=-1,
                  ext_name="",
                  fold_count=2):
    sw = helpers.Stopwatch.start_new()
    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if "metadata" in patient_id:
            continue
        if "labels" in patient_id:
            continue
        patient_img = helpers.load_patient_images(patient_id,
                                                  LUNA16_EXTRACTED_IMAGE_DIR,
                                                  "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(
                patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id,
                                                   LUNA16_EXTRACTED_IMAGE_DIR,
                                                   "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

            # patient_img = patient_img[:, ::-1, :]
            # patient_mask = patient_mask[:, ::-1, :]

        step = PREDICT_STEP
        CROP_SIZE = CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        annotation_index = 0
        if z0 < 0:
            z0 = 0
            z1 = predict_volume_shape[0]
        else:
            z1 = z0 + 1
        for z in range(z0, z1):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    #if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(
                                cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2
                                    p_z_perc = round(
                                        float(p_z) / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        float(p_y) / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        float(p_x) / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x, p_y, p_z,
                                        p_x_perc, p_y_perc, p_z_perc,
                                        diameter_perc, nodule_chance,
                                        diameter_mm
                                    ]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "ax", "ay", "az", "coord_x",
                                  "coord_y", "coord_z", "diameter",
                                  "nodule_chance", "diameter_mm"
                              ])
        filter_patient_nodules_predictions(df, patient_id,
                                           CROP_SIZE * magnification)
        return df
def process_excluded_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv(
        "resources/luna16_annotations/annotations_excluded.csv")
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    # pos_annos_df = pandas.read_csv(TRAIN_DIR + "metadata/" + patient_id + "_annos_pos_lidc.csv")
    pos_annos_df = pandas.read_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR +
                                   "_labels/" + patient_id + "_annos_pos.csv")
    pos_annos_manual = None
    manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)
        dmm = pos_annos_manual["dmm"]  # check

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape  #heightXwidth constitute the transverse plane
    origin = numpy.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(
        itk_img.GetSpacing())  # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(
        itk_img.GetDirection())  # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = helpers.load_patient_images(
        patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    neg_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        center_float_rescaled = (center_float -
                                 origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        diameter_pixels = 6 / settings.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        ok = True

        for index, row in pos_annos_df.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            print((pos_coord_x, pos_coord_y, pos_coord_z))
            print(center_float_rescaled)
            dist = math.sqrt(
                math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                math.pow(pos_coord_z - center_float_rescaled[2], 2))
            if dist < (diameter + 64):  #  make sure we have a big margin
                ok = False
                print("################### Too close", center_float_rescaled)
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(
                    math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                    math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                    math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):  #  make sure we have a big margin
                    ok = False
                    print("################### Too close",
                          center_float_rescaled)
                    break

        if not ok:
            continue

        neg_annos.append([
            anno_index,
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(diameter_percent, 4), 1
        ])
        anno_index += 1

    df_annos = pandas.DataFrame(neg_annos,
                                columns=[
                                    "anno_index", "coord_x", "coord_y",
                                    "coord_z", "diameter", "malscore"
                                ])
    df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" +
                    patient_id + "_annos_excluded.csv",
                    index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_pos_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv("resources/luna16_annotations/annotations.csv")
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape  #heightXwidth constitute the transverse plane
    origin = numpy.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(
        itk_img.GetSpacing())  # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(
        itk_img.GetDirection())  # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = helpers.load_patient_images(
        patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    pos_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        diam_mm = annotation["diameter_mm"]
        print("Node org (x,y,z,diam): ", (round(node_x, 2), round(
            node_y, 2), round(node_z, 2), round(diam_mm, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        print("Node tra (x,y,z,diam): ",
              (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float -
                                 origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        print("Node sca (x,y,z,diam): ",
              (center_float_rescaled[0], center_float_rescaled[1],
               center_float_rescaled[2]))
        diameter_pixels = diam_mm / settings.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        pos_annos.append([
            anno_index,
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(diameter_percent, 4), 1
        ])
        anno_index += 1

    df_annos = pandas.DataFrame(pos_annos,
                                columns=[
                                    "anno_index", "coord_x", "coord_y",
                                    "coord_z", "diameter", "malscore"
                                ])
    df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" +
                    patient_id + "_annos_pos.csv",
                    index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_pos_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv("resources/luna16_annotations/annotations.csv")
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing /settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    pos_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        diam_mm = annotation["diameter_mm"]
        print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(diam_mm, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        diameter_pixels = diam_mm / settings.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        pos_annos.append([anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1])
        anno_index += 1

    df_annos = pandas.DataFrame(pos_annos, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv", index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]
def process_luna_candidates_patient(src_path, patient_id):
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv(dst_dir + patient_id +
                                   "_annos_pos_lidc.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    pos_annos_manual = None
    manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    print("Pos annos: ", len(df_pos_annos))

    num_z, height, width = img_array.shape  #heightXwidth constitute the transverse plane
    origin = numpy.array(
        itk_img.GetOrigin())  # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(
        itk_img.GetSpacing())  # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(
        itk_img.GetDirection())  # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv("resources/luna16_annotations/" +
                             "candidates_V2.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    src_df = src_df[src_df["class"] == 0]
    patient_imgs = helpers.load_patient_images(
        patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = 6
        # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float -
                                 origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]

        ok = True

        for index, row in df_pos_annos.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            dist = math.sqrt(
                math.pow(pos_coord_x - coord_x, 2) +
                math.pow(pos_coord_y - coord_y, 2) +
                math.pow(pos_coord_z - coord_z, 2))
            if dist < (diameter + 64):  #  make sure we have a big margin
                ok = False
                print("################### Too close",
                      (coord_x, coord_y, coord_z))
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(
                    math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                    math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                    math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):  #  make sure we have a big margin
                    ok = False
                    print("################### Too close",
                          center_float_rescaled)
                    break

        if not ok:
            continue

        candidate_list.append([
            len(candidate_list),
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(candidate_diameter / patient_imgs.shape[0], 4), 0
        ])

    df_candidates = pandas.DataFrame(candidate_list,
                                     columns=[
                                         "anno_index", "coord_x", "coord_y",
                                         "coord_z", "diameter", "malscore"
                                     ])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv",
                         index=False)
def process_excluded_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv("resources/luna16_annotations/annotations_excluded.csv")
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    # pos_annos_df = pandas.read_csv(TRAIN_DIR + "metadata/" + patient_id + "_annos_pos_lidc.csv")
    pos_annos_df = pandas.read_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_pos.csv")
    pos_annos_manual = None
    manual_path = settings.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)
        dmm = pos_annos_manual["dmm"]  # check

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    neg_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        diameter_pixels = 6 / settings.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        ok = True

        for index, row in pos_annos_df.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            print((pos_coord_x, pos_coord_y, pos_coord_z))
            print(center_float_rescaled)
            dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2))
            if dist < (diameter + 64):  #  make sure we have a big margin
                ok = False
                print("################### Too close", center_float_rescaled)
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):  #  make sure we have a big margin
                    ok = False
                    print("################### Too close", center_float_rescaled)
                    break

        if not ok:
            continue

        neg_annos.append([anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1])
        anno_index += 1

    df_annos = pandas.DataFrame(neg_annos, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_annos.to_csv(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" + patient_id + "_annos_excluded.csv", index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]