def make_negative_train_data_based_on_predicted_luna_nodules():
    src_dir = settings.LUNA_NODULE_DETECTION_DIR
    pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)
        pos_annos_manual = None
        manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv"
        if os.path.exists(manual_path):
            pos_annos_manual = pandas.read_csv(manual_path)

        filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True)
        pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv")
        logger.info("csv_index {0} : patient_id {1} , pos {2}".format(csv_index, patient_id, len(pos_labels)))
        patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png")
        for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"],
                                                      nod_pred_row["coord_z"], patient_imgs)
            diam_mm = nod_pred_row["diameter_mm"]
            for label_index, label_row in pos_labels.iterrows():
                px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"],
                                                          label_row["coord_z"], patient_imgs)
                dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz - pz, 2))
                if dist < keep_dist:
                    if diam_mm >= 0:
                        diam_mm *= -1
                    df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                    break

            if pos_annos_manual is not None:
                for index, label_row in pos_annos_manual.iterrows():
                    px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"],
                                                              patient_imgs)
                    diameter = label_row["d"] * patient_imgs[0].shape[1]
                    # print((pos_coord_x, pos_coord_y, pos_coord_z))
                    # print(center_float_rescaled)
                    dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2))
                    if dist < (diameter + 72):  # make sure we have a big margin
                        if diam_mm >= 0:
                            diam_mm *= -1
                        df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                        logger.info("#Too close: {0} {1} {2}".format(nx, ny, nz))
                        break

        df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False)
        total_false_pos += len(df_nodule_predictions)
    logger.info("Total false pos: {0}".format(total_false_pos))
def make_negative_train_data_based_on_predicted_luna_nodules():
    src_dir = settings.LUNA_NODULE_DETECTION_DIR
    pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)
        pos_annos_manual = None
        manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv"
        if os.path.exists(manual_path):
            pos_annos_manual = pandas.read_csv(manual_path)

        filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True)
        pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv")
        print(csv_index, ": ", patient_id, ", pos", len(pos_labels))
        patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png")
        for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs)
            diam_mm = nod_pred_row["diameter_mm"]
            for label_index, label_row in pos_labels.iterrows():
                px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs)
                dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz- pz, 2))
                if dist < keep_dist:
                    if diam_mm >= 0:
                        diam_mm *= -1
                    df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                    break

            if pos_annos_manual is not None:
                for index, label_row in pos_annos_manual.iterrows():
                    px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs)
                    diameter = label_row["d"] * patient_imgs[0].shape[1]
                    # print((pos_coord_x, pos_coord_y, pos_coord_z))
                    # print(center_float_rescaled)
                    dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2))
                    if dist < (diameter + 72):  #  make sure we have a big margin
                        if diam_mm >= 0:
                            diam_mm *= -1
                        df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm
                        print("#Too close",  (nx, ny, nz))
                        break

        df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False)
        total_false_pos += len(df_nodule_predictions)
    print("Total false pos:", total_false_pos)
Exemple #3
0
def make_predicted_luna_nodules():
    src_dir = settings.TEST_NODULE_DETECTION_DIR + 'predictions10_luna16_fs/'
    pos_labels_dir = settings.TEST_NODULE_DETECTION_DIR
    keep_dist = CUBE_SIZE + CUBE_SIZE / 2
    total_false_pos = 0
    for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")):
        file_name = ntpath.basename(csv_path)
        patient_id = file_name.replace(".csv", "")
        # if not "273525289046256012743471155680" in patient_id:
        #     continue
        df_nodule_predictions = pandas.read_csv(csv_path)

        df_nodule_predictions = filter_patient_nodules_predictions(
            df_nodule_predictions, patient_id, CUBE_SIZE, luna16=False)

        patient_imgs = helpers.load_patient_images(
            patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_m.png")
        # patient_space=pandas.read_csv('../DSB2017/data/ndsb3_extracted_images/patient_spacing.csv')
        # p_pace = patient_space.loc[patient_space.patient_id==patient_id]
        df_nodule_predictions.sort_values(by='nodule_chance',
                                          ascending=False,
                                          inplace=True)
        df_nodule_predictions_copy = df_nodule_predictions.copy()
        for nod_pred_index, nod_pred_row in df_nodule_predictions_copy.iterrows(
        ):
            if nod_pred_row["diameter_mm"] < 0:
                continue
            nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"],
                                                      nod_pred_row["coord_y"],
                                                      nod_pred_row["coord_z"],
                                                      patient_imgs)
            candidate_diameter = 6
            for index, row in df_nodule_predictions.iterrows():
                x, y, z = helpers.percentage_to_pixels(row["coord_x"],
                                                       row["coord_y"],
                                                       row["coord_z"],
                                                       patient_imgs)
                dist = math.sqrt(
                    math.pow(nx - x, 2) + math.pow(ny - y, 2) +
                    math.pow(nz - z, 2))
                if dist < (candidate_diameter +
                           48) and dist > 1:  #  make sure we have a big margin
                    ok = False
                    print("# Too close")
                    mal_score = row["diameter_mm"]
                    if nod_pred_row['nodule_chance'] > row['nodule_chance']:
                        if mal_score > 0:
                            mal_score *= -1
                        df_nodule_predictions.loc[index,
                                                  "diameter_mm"] = mal_score
                    continue

        # for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows():
        #     if nod_pred_row["diameter_mm"] < 0:
        #         continue
        #     nx, ny, nz = helpers.percentage_to_orig(nod_pred_row["coord_x"],p_pace['spacing_x'], nod_pred_row["coord_y"],p_pace['spacing_y'], nod_pred_row["coord_z"],p_pace['spacing_z'], patient_imgs)
        #     df_nodule_predictions.loc[nod_pred_index, "coord_x"] = nx
        #     df_nodule_predictions.loc[nod_pred_index, "coord_y"] = ny
        #     df_nodule_predictions.loc[nod_pred_index, "coord_z"] = nz
        #     # diam_mm = nod_pred_row["diameter_mm"]

        # df_nodule_predictions.to_csv(csv_path, index=False)
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id +
                                     "_candidates_1.csv",
                                     index=False)
        df_nodule_predictions = df_nodule_predictions[
            df_nodule_predictions["diameter_mm"] >= 0]
        df_nodule_predictions = df_nodule_predictions[
            df_nodule_predictions["nodule_chance"] >= 0.9]
        # df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["nodule_chance"] >= 0.9]
        del df_nodule_predictions['diameter']
        del df_nodule_predictions['diameter_mm']
        del df_nodule_predictions['anno_index']
        df_nodule_predictions['seriesuid'] = patient_id
        df_nodule_predictions.to_csv(pos_labels_dir + patient_id +
                                     "_candidates.csv",
                                     index=False)
        total_false_pos += len(df_nodule_predictions)
    print("Total false pos:", total_false_pos)