def make_negative_train_data_based_on_predicted_luna_nodules(): src_dir = settings.LUNA_NODULE_DETECTION_DIR pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) pos_annos_manual = None manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True) pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv") logger.info("csv_index {0} : patient_id {1} , pos {2}".format(csv_index, patient_id, len(pos_labels))) patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png") for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) diam_mm = nod_pred_row["diameter_mm"] for label_index, label_row in pos_labels.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs) dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz - pz, 2)) if dist < keep_dist: if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm break if pos_annos_manual is not None: for index, label_row in pos_annos_manual.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs) diameter = label_row["d"] * patient_imgs[0].shape[1] # print((pos_coord_x, pos_coord_y, pos_coord_z)) # print(center_float_rescaled) dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2)) if dist < (diameter + 72): # make sure we have a big margin if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm logger.info("#Too close: {0} {1} {2}".format(nx, ny, nz)) break df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False) total_false_pos += len(df_nodule_predictions) logger.info("Total false pos: {0}".format(total_false_pos))
def make_negative_train_data_based_on_predicted_luna_nodules(): src_dir = settings.LUNA_NODULE_DETECTION_DIR pos_labels_dir = settings.LUNA_NODULE_LABELS_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) pos_annos_manual = None manual_path = settings.MANUAL_ANNOTATIONS_LABELS_DIR + patient_id + ".csv" if os.path.exists(manual_path): pos_annos_manual = pandas.read_csv(manual_path) filter_patient_nodules_predictions(df_nodule_predictions, patient_id, CUBE_SIZE, luna16=True) pos_labels = pandas.read_csv(pos_labels_dir + patient_id + "_annos_pos_lidc.csv") print(csv_index, ": ", patient_id, ", pos", len(pos_labels)) patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*_m.png") for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) diam_mm = nod_pred_row["diameter_mm"] for label_index, label_row in pos_labels.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["coord_x"], label_row["coord_y"], label_row["coord_z"], patient_imgs) dist = math.sqrt(math.pow(nx - px, 2) + math.pow(ny - py, 2) + math.pow(nz- pz, 2)) if dist < keep_dist: if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm break if pos_annos_manual is not None: for index, label_row in pos_annos_manual.iterrows(): px, py, pz = helpers.percentage_to_pixels(label_row["x"], label_row["y"], label_row["z"], patient_imgs) diameter = label_row["d"] * patient_imgs[0].shape[1] # print((pos_coord_x, pos_coord_y, pos_coord_z)) # print(center_float_rescaled) dist = math.sqrt(math.pow(px - nx, 2) + math.pow(py - ny, 2) + math.pow(pz - nz, 2)) if dist < (diameter + 72): # make sure we have a big margin if diam_mm >= 0: diam_mm *= -1 df_nodule_predictions.loc[nod_pred_index, "diameter_mm"] = diam_mm print("#Too close", (nx, ny, nz)) break df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_falsepos.csv", index=False) total_false_pos += len(df_nodule_predictions) print("Total false pos:", total_false_pos)
def make_predicted_luna_nodules(): src_dir = settings.TEST_NODULE_DETECTION_DIR + 'predictions10_luna16_fs/' pos_labels_dir = settings.TEST_NODULE_DETECTION_DIR keep_dist = CUBE_SIZE + CUBE_SIZE / 2 total_false_pos = 0 for csv_index, csv_path in enumerate(glob.glob(src_dir + "*.csv")): file_name = ntpath.basename(csv_path) patient_id = file_name.replace(".csv", "") # if not "273525289046256012743471155680" in patient_id: # continue df_nodule_predictions = pandas.read_csv(csv_path) df_nodule_predictions = filter_patient_nodules_predictions( df_nodule_predictions, patient_id, CUBE_SIZE, luna16=False) patient_imgs = helpers.load_patient_images( patient_id, settings.TEST_EXTRACTED_IMAGE_DIR, "*_m.png") # patient_space=pandas.read_csv('../DSB2017/data/ndsb3_extracted_images/patient_spacing.csv') # p_pace = patient_space.loc[patient_space.patient_id==patient_id] df_nodule_predictions.sort_values(by='nodule_chance', ascending=False, inplace=True) df_nodule_predictions_copy = df_nodule_predictions.copy() for nod_pred_index, nod_pred_row in df_nodule_predictions_copy.iterrows( ): if nod_pred_row["diameter_mm"] < 0: continue nx, ny, nz = helpers.percentage_to_pixels(nod_pred_row["coord_x"], nod_pred_row["coord_y"], nod_pred_row["coord_z"], patient_imgs) candidate_diameter = 6 for index, row in df_nodule_predictions.iterrows(): x, y, z = helpers.percentage_to_pixels(row["coord_x"], row["coord_y"], row["coord_z"], patient_imgs) dist = math.sqrt( math.pow(nx - x, 2) + math.pow(ny - y, 2) + math.pow(nz - z, 2)) if dist < (candidate_diameter + 48) and dist > 1: # make sure we have a big margin ok = False print("# Too close") mal_score = row["diameter_mm"] if nod_pred_row['nodule_chance'] > row['nodule_chance']: if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score continue # for nod_pred_index, nod_pred_row in df_nodule_predictions.iterrows(): # if nod_pred_row["diameter_mm"] < 0: # continue # nx, ny, nz = helpers.percentage_to_orig(nod_pred_row["coord_x"],p_pace['spacing_x'], nod_pred_row["coord_y"],p_pace['spacing_y'], nod_pred_row["coord_z"],p_pace['spacing_z'], patient_imgs) # df_nodule_predictions.loc[nod_pred_index, "coord_x"] = nx # df_nodule_predictions.loc[nod_pred_index, "coord_y"] = ny # df_nodule_predictions.loc[nod_pred_index, "coord_z"] = nz # # diam_mm = nod_pred_row["diameter_mm"] # df_nodule_predictions.to_csv(csv_path, index=False) df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates_1.csv", index=False) df_nodule_predictions = df_nodule_predictions[ df_nodule_predictions["diameter_mm"] >= 0] df_nodule_predictions = df_nodule_predictions[ df_nodule_predictions["nodule_chance"] >= 0.9] # df_nodule_predictions = df_nodule_predictions[df_nodule_predictions["nodule_chance"] >= 0.9] del df_nodule_predictions['diameter'] del df_nodule_predictions['diameter_mm'] del df_nodule_predictions['anno_index'] df_nodule_predictions['seriesuid'] = patient_id df_nodule_predictions.to_csv(pos_labels_dir + patient_id + "_candidates.csv", index=False) total_false_pos += len(df_nodule_predictions) print("Total false pos:", total_false_pos)