Beispiel #1
0
def make_pos_annotation_imgs():
    src_dir = variables.LUNA_16_trn_DIR2D2 + "metadata/"
    dst_dir = variables.BASE_DIR_SSD + "luna16_trn_cubes_pos/"
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        imgs = operators.load_patient_imgs(patient_id,
                                           variables.LUNA_16_trn_DIR2D2,
                                           "*" + "_i" + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * imgs.shape[2])
            coord_y = int(row["coord_y"] * imgs.shape[1])
            coord_z = int(row["coord_z"] * imgs.shape[0])
            diam_mm = int(row["diameter"] * imgs.shape[2])
            anno_index = int(row["anno_index"])
            cube_img = get_cube_from_img(imgs, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8)
        operators.print_tabbed([patient_index, patient_id,
                                len(df_annos)], [5, 64, 8])
Beispiel #2
0
def make_candidate_auto_imgs(candidate_types=[]):
    dst_dir = variables.BASE_DIR_SSD + "generated_trndata/luna16_trn_cubes_auto/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for candidate_type in candidate_types:
        for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"):
            os.remove(file_path)

    for candidate_type in candidate_types:
        if candidate_type == "falsepos":
            src_dir = "resources/luna16_falsepos_labels/"
        else:
            src_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

        for index, csv_file in enumerate(
                glob.glob(src_dir + "*_candidates_" + candidate_type +
                          ".csv")):
            patient_id = ntpath.basename(csv_file).replace(
                "_candidates_" + candidate_type + ".csv", "")
            print(index, ",patient: ", patient_id, " type:", candidate_type)
            df_annos = pandas.read_csv(csv_file)
            if len(df_annos) == 0:
                continue
            imgs = operators.load_patient_imgs(
                patient_id,
                variables.LUNA16_EXTRACTED_IMAGE_DIR,
                "*" + "_i" + ".png",
                exclude_wildcards=[])

            row_no = 0
            for index, row in df_annos.iterrows():
                coord_x = int(row["coord_x"] * imgs.shape[2])
                coord_y = int(row["coord_y"] * imgs.shape[1])
                coord_z = int(row["coord_z"] * imgs.shape[0])
                anno_index = int(row["anno_index"])
                cube_img = get_cube_from_img(imgs, coord_x, coord_y, coord_z,
                                             48)
                if cube_img.sum() < 10:
                    print("Skipping ", coord_x, coord_y, coord_z)
                    continue
                try:
                    save_cube_img(
                        dst_dir + patient_id + "_" + str(anno_index) + "_0_" +
                        candidate_type + ".png", cube_img, 6, 8)
                except Exception as ex:
                    print(ex)

                row_no += 1
                max_item = 240 if candidate_type == "white" else 200
                if candidate_type == "luna":
                    max_item = 500
                if row_no > max_item:
                    break
Beispiel #3
0
def filter_patient_nods_predictions(df_nod_predictions: pandas.DataFrame,
                                    patient_id,
                                    view_size,
                                    luna16=True):
    src_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR if luna16 else variables.NDSB3_EXTRACTED_IMAGE_DIR
    patient_mask = operators.load_patient_imgs(patient_id, src_dir, "*_m.png")
    delete_indices = []
    for index, row in df_nod_predictions.iterrows():
        z_perc = row["coord_z"]
        y_perc = row["coord_y"]
        center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        mal_score = row["diameter_mm"]
        start_y = center_y - view_size / 2
        start_x = center_x - view_size / 2
        nod_in_mask = False
        for z_index in [-1, 0, 1]:
            img = patient_mask[z_index + center_z]
            start_x = int(start_x)
            start_y = int(start_y)
            view_size = int(view_size)
            img_roi = img[start_y:start_y + view_size,
                          start_x:start_x + view_size]
            if img_roi.sum() > 255:
                nod_in_mask = True

        if not nod_in_mask:
            print("nod not in mask: ", (center_x, center_y, center_z))
            if mal_score > 0:
                mal_score *= -1
            df_nod_predictions.loc[index, "diameter_mm"] = mal_score
        else:
            if center_z < 30:
                print("Z < 30: ", patient_id, " center z:", center_z,
                      " y_perc: ", y_perc)
                if mal_score > 0:
                    mal_score *= -1
                df_nod_predictions.loc[index, "diameter_mm"] = mal_score

            if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85:
                print("SUSPICIOUS FALSEPOSITIVE: ", patient_id, " center z:",
                      center_z, " y_perc: ", y_perc)

            if center_z < 50 and y_perc < 0.30:
                print("SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: ", patient_id,
                      " center z:", center_z, " y_perc: ", y_perc)

    df_nod_predictions.drop(df_nod_predictions.index[delete_indices],
                            inplace=True)
    return df_nod_predictions
Beispiel #4
0
def make_pos_annotation_imgs_manual_ndsb3():
    src_dir = "resources/ndsb3_manual_labels/"
    dst_dir = variables.BASE_DIR_SSD + "generated_trndata/ndsb3_trn_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    trn_label_df = pandas.read_csv("resources/stage1_labels.csv")
    trn_label_df.set_index(["id"], inplace=True)
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4.1" in patient_id:
            continue

        cancer_label = trn_label_df.loc[patient_id]["cancer"]
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        imgs = operators.load_patient_imgs(patient_id,
                                           variables.NDSB3_EXTRACTED_IMAGE_DIR,
                                           "*" + "_i" + ".png")

        anno_index = 0
        for index, row in df_annos.iterrows():
            pos_neg = "pos" if row["id"] == 0 else "neg"
            coord_x = int(row["x"] * imgs.shape[2])
            coord_y = int(row["y"] * imgs.shape[1])
            coord_z = int(row["z"] * imgs.shape[0])
            malscore = int(round(row["dmm"]))
            anno_index += 1
            cube_img = get_cube_from_img(imgs, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue
            print(patient_id)
            assert malscore > 0 or pos_neg == "neg"
            save_cube_img(
                dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) +
                "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) +
                "_1_pn.png", cube_img, 8, 8)
        operators.print_tabbed([patient_index, patient_id,
                                len(df_annos)], [5, 64, 8])
Beispiel #5
0
def make_pos_annotation_imgs_manual():
    src_dir = "resources/luna16_manual_labels/"

    dst_dir = variables.BASE_DIR_SSD + "generated_trndata/luna16_trn_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*_manual.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4" not in patient_id:
            continue

        print(patient_id)
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        imgs = operators.load_patient_imgs(
            patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + "_i" + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["x"] * imgs.shape[2])
            coord_y = int(row["y"] * imgs.shape[1])
            coord_z = int(row["z"] * imgs.shape[0])
            diameter = int(row["d"] * imgs.shape[2])
            node_type = int(row["id"])
            malscore = int(diameter)
            malscore = min(25, malscore)
            malscore = max(16, malscore)
            anno_index = index
            cube_img = get_cube_from_img(imgs, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") +
                ".png", cube_img, 8, 8)
        operators.print_tabbed([patient_index, patient_id,
                                len(df_annos)], [5, 64, 8])
Beispiel #6
0
def make_annotation_imgs_lidc():
    src_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    dst_dir = variables.BASE_DIR_SSD + "generated_trndata/luna16_trn_cubes_lidc/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos_lidc.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv",
                                                       "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        imgs = operators.load_patient_imgs(
            patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + "_i" + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * imgs.shape[2])
            coord_y = int(row["coord_y"] * imgs.shape[1])
            coord_z = int(row["coord_z"] * imgs.shape[0])
            malscore = int(row["malscore"])
            anno_index = row["anno_index"]
            anno_index = str(anno_index).replace(" ", "xspacex").replace(
                ".", "xpointx").replace("_", "xunderscorex")
            cube_img = get_cube_from_img(imgs, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                print(" ***** Skipping ", coord_x, coord_y, coord_z)
                continue

            if cube_img.mean() < 10:
                print(" ***** Suspicious ", coord_x, coord_y, coord_z)

            if cube_img.shape != (64, 64, 64):
                print(" ***** incorrect shape !!! ", str(anno_index), " - ",
                      (coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8)
        operators.print_tabbed([patient_index, patient_id,
                                len(df_annos)], [5, 64, 8])
Beispiel #7
0
def predict_cubes(model_path,
                  continue_job,
                  only_patient_id=None,
                  luna16=True,
                  magnification=1,
                  flip=False,
                  trn_data=True,
                  valid_no=-1,
                  ext_name="",
                  fold_count=2):
    if luna16:
        dst_dir = variables.LUNA_nod_DETECTION_DIR
    else:
        dst_dir = variables.NDSB3_nod_DETECTION_DIR + "2"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    valid_ext = ""
    flip_ext = ""
    if flip:
        flip_ext = "_flip"

    dst_dir += "predictions" + str(int(
        magnification * 10)) + valid_ext + flip_ext + "_" + ext_name + "/"
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    sw = operators.Stopwatch.start_new()
    model = nodule_detector.get_net(input_shape=(SIZE, SIZE, SIZE, 1),
                                    load_weight_path=model_path)
    if luna16:
        labels_df = pandas.read_csv(
            "/media/pikachu/Seagate Backup Plus Drive/LC nod Detection/resources/luna16_annotations/annotations.csv"
        )
    if not luna16:
        if trn_data:
            labels_df = pandas.read_csv("resources/stage1_labels.csv")
            labels_df.set_index(["id"], inplace=True)
        else:
            labels_df = pandas.read_csv(
                "resources/stage2_sample_submission.csv")
            labels_df.set_index(["id"], inplace=True)

    patient_ids = []
    for file_name in os.listdir(variables.LUNA16_EXTRACTED_IMAGE_DIR):
        if not os.path.isdir(variables.LUNA16_EXTRACTED_IMAGE_DIR + file_name):
            continue
        patient_ids.append(file_name)

    all_predictions_csv = []
    for patient_index, patient_id in enumerate(reversed(patient_ids)):
        if not luna16:
            if patient_id not in labels_df.index:
                continue
        if "metadata" in patient_id:
            continue
        if only_patient_id is not None and only_patient_id != patient_id:
            continue

        if valid_no is not None and trn_data:
            patient_fold = operators.get_patient_fold(patient_id)
            patient_fold %= fold_count
            if patient_fold != valid_no:
                continue

        print(patient_index, ": ", patient_id)
        csv_target_path = dst_dir + patient_id + ".csv"
        if continue_job and only_patient_id is None:
            if os.path.exists(csv_target_path):
                continue

        patient_img = operators.load_patient_imgs(
            patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png", [])
        if magnification != 1:
            patient_img = operators.rescale_patient_imgs(
                patient_img, (1, 1, 1), magnification)

        patient_mask = operators.load_patient_imgs(
            patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR, "*_m.png", [])
        if magnification != 1:
            patient_mask = operators.rescale_patient_imgs(patient_mask,
                                                          (1, 1, 1),
                                                          magnification,
                                                          is_mask_image=True)

        step = PREDICT_STEP
        CROP_SIZE = SIZE

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (predict_volume_shape_list[0],
                                predict_volume_shape_list[1],
                                predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        print("Predict volume shape: ", predict_volume.shape)
        done_count = 0
        skipped_count = 0
        batch_size = 32
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    cube_img = patient_img[z * step:z * step + CROP_SIZE,
                                           y * step:y * step + CROP_SIZE,
                                           x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE,
                                             y * step:y * step + CROP_SIZE,
                                             x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != SIZE:
                            cube_img = operators.rescale_patient_imgs2(
                                cube_img, (SIZE, SIZE, SIZE))
                            operators.save_cube_img(
                                "/media/pikachu/Seagate Backup Plus Drive/LC nod Detection/workdir",
                                cube_img, 8, 4)
                            cube_mask = operators.rescale_patient_imgs2(
                                cube_mask, (SIZE, SIZE, SIZE))

                        img_prep = prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = model.predict(batch_data,
                                              batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nod_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nod_chance
                                if nod_chance > P_TH:
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    p_z_perc = round(
                                        p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(
                                        p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(
                                        p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    diameter_perc = round(
                                        2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(
                                        diameter_mm / patient_img.shape[2], 4)
                                    nod_chance = round(nod_chance, 4)
                                    patient_predictions_csv_line = [
                                        annotation_index, p_x_perc, p_y_perc,
                                        p_z_perc, diameter_perc, nod_chance,
                                        diameter_mm
                                    ]
                                    patient_predictions_csv.append(
                                        patient_predictions_csv_line)
                                    all_predictions_csv.append(
                                        [patient_id] +
                                        patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        print("Done: ", done_count, " skipped:", skipped_count)

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=[
                                  "anno_index", "coord_x", "coord_y",
                                  "coord_z", "diameter", "nod_chance",
                                  "diameter_mm"
                              ])
        print(df)
        if not df.empty:
            filter_patient_nods_predictions(df, patient_id,
                                            CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)

        print(predict_volume.mean())
        print("Done in : ", sw.get_elapsed_seconds(), " seconds")
Beispiel #8
0
def make_negative_trn_data_based_on_predicted_luna_nods():
    src_dir = variables.LUNA_nod_DETECTION_DIR
    pos_labels_dir = variables.LUNA_nod_LABELS_DIR
    keep_dist = SIZE + SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        df_nod_predictions = pandas.read_csv(csv_path)
        pos_annos_manual = None
        manual_path = variables.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv"
        if os.path.exists(manual_path):
            pos_annos_manual = pandas.read_csv(manual_path)

        filter_patient_nods_predictions(df_nod_predictions,
                                        patient_id,
                                        SIZE,
                                        luna16=True)
        pos_labels = pandas.read_csv(pos_labels_dir + patient_id +
                                     "_annos_pos_lidc.csv")
        print(csv_index, ": ", patient_id, ", pos", len(pos_labels))
        patient_imgs = operators.load_patient_imgs(
            patient_id, variables.LUNA_16_trn_DIR2D2, "*_m.png")
        for nod_pred_index, nod_pred_row in df_nod_predictions.iterrows():
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = operators.percentage_to_pixels(
                nod_pred_row["coord_x"], nod_pred_row["coord_y"],
                nod_pred_row["coord_z"], patient_imgs)
            diam_mm = nod_pred_row["diameter_mm"]
            for label_index, label_row in pos_labels.iterrows():
                px, py, pz = operators.percentage_to_pixels(
                    label_row["coord_x"], label_row["coord_y"],
                    label_row["coord_z"], patient_imgs)
                dist = math.sqrt(
                    math.pow(nx - px, 2) + math.pow(ny - py, 2) +
                    math.pow(nz - pz, 2))
                if dist < keep_dist:
                    if diam_mm >= 0:
                        diam_mm *= -1
                    df_nod_predictions.loc[nod_pred_index,
                                           "diameter_mm"] = diam_mm
                    break

            if pos_annos_manual is not None:
                for index, label_row in pos_annos_manual.iterrows():
                    px, py, pz = operators.percentage_to_pixels(
                        label_row["x"], label_row["y"], label_row["z"],
                        patient_imgs)
                    diameter = label_row["d"] * patient_imgs[0].shape[1]

                    dist = math.sqrt(
                        math.pow(px - nx, 2) + math.pow(py - ny, 2) +
                        math.pow(pz - nz, 2))
                    if dist < (diameter + 72):
                        if diam_mm >= 0:
                            diam_mm *= -1
                        df_nod_predictions.loc[nod_pred_index,
                                               "diameter_mm"] = diam_mm
                        print("#Too close", (nx, ny, nz))
                        break

        df_nod_predictions.to_csv(csv_path, index=False)
        df_nod_predictions = df_nod_predictions[
            df_nod_predictions["diameter_mm"] >= 0]
        df_nod_predictions.to_csv(pos_labels_dir + patient_id +
                                  "_candidates_falsepos.csv",
                                  index=False)
        total_false_pos += len(df_nod_predictions)
    print("Total false pos:", total_false_pos)
Beispiel #9
0
def proc_luna_candidates_patient(src_path, patient_id):
    dst_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv(dst_dir + patient_id +
                                   "_annos_pos_lidc.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    pos_annos_manual = None
    manual_path = variables.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    print("Pos annos: ", len(df_pos_annos))

    num_z, height, width = img_array.shape
    origin = numpy.array(itk_img.GetOrigin())
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / variables.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv(
        "/media/pikachu/Seagate Backup Plus Drive/LC nod Detection/resources/luna16_annotations/"
        + "candidates_V2.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    src_df = src_df[src_df["class"] == 0]
    patient_imgs = operators.load_patient_imgs(
        patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = 6
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        center_float_rescaled = (center_float -
                                 origin) / variables.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]

        ok = True

        for index, row in df_pos_annos.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            dist = math.sqrt(
                math.pow(pos_coord_x - coord_x, 2) +
                math.pow(pos_coord_y - coord_y, 2) +
                math.pow(pos_coord_z - coord_z, 2))
            if dist < (diameter + 64):
                ok = False
                print("CANNOT", (coord_x, coord_y, coord_z))
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(
                    math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                    math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                    math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):
                    ok = False
                    print("CANNOT", center_float_rescaled)
                    break

        if not ok:
            continue

        candidate_list.append([
            len(candidate_list),
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(candidate_diameter / patient_imgs.shape[0], 4), 0
        ])

    df_candidates = pandas.DataFrame(candidate_list,
                                     columns=[
                                         "anno_index", "coord_x", "coord_y",
                                         "coord_z", "diameter", "malscore"
                                     ])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv",
                         index=False)
Beispiel #10
0
def proc_excluded_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv(
        "/media/pikachu/Seagate Backup Plus Drive/LC nod Detection/resources/luna16_annotations/annotations_excluded.csv"
    )
    dst_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    pos_annos_df = pandas.read_csv(variables.LUNA16_EXTRACTED_IMAGE_DIR +
                                   "_labels/" + patient_id + "_annos_pos.csv")
    pos_annos_manual = None
    manual_path = variables.EXTRA_DATA_DIR + "luna16_manual_labels/" + patient_id + ".csv"
    if os.path.exists(manual_path):
        pos_annos_manual = pandas.read_csv(manual_path)
        dmm = pos_annos_manual["dmm"]

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape
    origin = numpy.array(itk_img.GetOrigin())
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / variables.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = operators.load_patient_imgs(
        patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    neg_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)
        center_float_rescaled = (center_float -
                                 origin) / variables.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape

        diameter_pixels = 6 / variables.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        ok = True

        for index, row in pos_annos_df.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            print((pos_coord_x, pos_coord_y, pos_coord_z))
            print(center_float_rescaled)
            dist = math.sqrt(
                math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                math.pow(pos_coord_z - center_float_rescaled[2], 2))
            if dist < (diameter + 64):
                ok = False
                print("CANNOT", center_float_rescaled)
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(
                    math.pow(pos_coord_x - center_float_rescaled[0], 2) +
                    math.pow(pos_coord_y - center_float_rescaled[1], 2) +
                    math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):
                    ok = False
                    print("CANNOT", center_float_rescaled)
                    break

        if not ok:
            continue

        neg_annos.append([
            anno_index,
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(diameter_percent, 4), 1
        ])
        anno_index += 1

    df_annos = pandas.DataFrame(neg_annos,
                                columns=[
                                    "anno_index", "coord_x", "coord_y",
                                    "coord_z", "diameter", "malscore"
                                ])
    df_annos.to_csv(variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" +
                    patient_id + "_annos_excluded.csv",
                    index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]
Beispiel #11
0
def proc_pos_annotations_patient(src_path, patient_id):
    df_node = pandas.read_csv(
        "/media/pikachu/Seagate Backup Plus Drive/LC nod Detection/resources/luna16_annotations/annotations.csv"
    )
    dst_dir = variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    dst_dir = dst_dir + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    print("Annos: ", len(df_patient))

    num_z, height, width = img_array.shape
    origin = numpy.array(itk_img.GetOrigin())
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / variables.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True

    assert abs(sum(direction) - 3) < 0.01

    patient_imgs = operators.load_patient_imgs(
        patient_id, variables.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")

    pos_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        diam_mm = annotation["diameter_mm"]

        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float - origin) / spacing)

        center_float_rescaled = (center_float -
                                 origin) / variables.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(
            0, 2).shape

        diameter_pixels = diam_mm / variables.TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        pos_annos.append([
            anno_index,
            round(center_float_percent[0], 4),
            round(center_float_percent[1], 4),
            round(center_float_percent[2], 4),
            round(diameter_percent, 4), 1
        ])
        anno_index += 1

    df_annos = pandas.DataFrame(pos_annos,
                                columns=[
                                    "anno_index", "coord_x", "coord_y",
                                    "coord_z", "diameter", "malscore"
                                ])
    df_annos.to_csv(variables.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" +
                    patient_id + "_annos_pos.csv",
                    index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]