def make_candidate_auto_images(candidate_types=[]): dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_auto/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for candidate_type in candidate_types: for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"): os.remove(file_path) for candidate_type in candidate_types: if candidate_type == "falsepos": src_dir = "resources/luna16_falsepos_labels/" else: src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" for index, csv_file in enumerate( glob.glob(src_dir + "*_candidates_" + candidate_type + ".csv")): patient_id = ntpath.basename(csv_file).replace( "_candidates_" + candidate_type + ".csv", "") settings.log.info("{0}, patient: {1}, type:{2} ".format( index, patient_id, candidate_type)) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png", exclude_wildcards=[]) row_no = 0 for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 48) if cube_img.sum() < 10: settings.log.info(" ***** Skipping {0} {1} {2} ".format( coord_x, coord_y, coord_z)) continue # print(cube_img.sum()) try: save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_0_" + candidate_type + ".png", cube_img, 6, 8) except Exception as ex: settings.log.exception(ex) row_no += 1 max_item = 240 if candidate_type == "white" else 200 if candidate_type == "luna": max_item = 500 if row_no > max_item: break
def make_pos_annotation_images_manual(): src_dir = "resources/luna16_manual_labels/" dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*_manual.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4" not in patient_id: continue print(patient_id) # if not "172845185165807139298420209778" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) diameter = int(row["d"] * images.shape[2]) node_type = int(row["id"]) malscore = int(diameter) malscore = min(25, malscore) malscore = max(16, malscore) anno_index = index cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: settings.log.info(" ***** Skipping {0} {1} {2} ".format( coord_x, coord_y, coord_z)) continue if cube_img.mean() < 10: settings.log.info(" ***** Suspicious {0} {1} {2} ".format( coord_x, coord_y, coord_z)) if cube_img.shape != (64, 64, 64): settings.log.info( " ***** incorrect shape !!! {0} - {1} {2} {3} ".format( str(anno_index), coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") + ".png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_pos_annotation_images_manual_ndsb3(): src_dir = "resources/ndsb3_manual_labels/" dst_dir = settings.WORKING_DIR + "generated_traindata/ndsb3_train_cubes_manual/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) train_label_df = pandas.read_csv("resources/stage1_labels.csv") train_label_df.set_index(["id"], inplace=True) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")): patient_id = ntpath.basename(csv_file).replace(".csv", "") if "1.3.6.1.4.1" in patient_id: continue cancer_label = train_label_df.loc[patient_id]["cancer"] df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") anno_index = 0 for index, row in df_annos.iterrows(): pos_neg = "pos" if row["id"] == 0 else "neg" coord_x = int(row["x"] * images.shape[2]) coord_y = int(row["y"] * images.shape[1]) coord_z = int(row["z"] * images.shape[0]) malscore = int(round(row["dmm"])) anno_index += 1 cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: settings.log.info(" ***** Skipping {0} {1} {2} ".format( coord_x, coord_y, coord_z)) continue if cube_img.mean() < 10: settings.log.info(" ***** Suspicious {0} {1} {2} ".format( coord_x, coord_y, coord_z)) if cube_img.shape != (64, 64, 64): settings.log.info( " ***** incorrect shape !!! {0} - {1} {2} {3} ".format( str(anno_index), coord_x, coord_y, coord_z)) continue print(patient_id) assert malscore > 0 or pos_neg == "neg" save_cube_img( dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) + "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) + "_1_pn.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def make_annotation_images_lidc(): src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/" dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_lidc/" if not os.path.exists(dst_dir): os.mkdir(dst_dir) for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos_lidc.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv", "") df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images( patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) malscore = int(row["malscore"]) anno_index = row["anno_index"] anno_index = str(anno_index).replace(" ", "xspacex").replace( ".", "xpointx").replace("_", "xunderscorex") cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: settings.log.info(" ***** Skipping {0} {1} {2} ".format( coord_x, coord_y, coord_z)) continue if cube_img.mean() < 10: settings.log.info(" ***** Suspicious {0} {1} {2} ".format( coord_x, coord_y, coord_z)) if cube_img.shape != (64, 64, 64): settings.log.info( " ***** incorrect shape !!! {0} - {1} {2} {3} ".format( str(anno_index), coord_x, coord_y, coord_z)) continue save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def filter_patient_nodules_predictions(self, df_nodule_predictions, patient_id, image_dir, view_size): # def filter_patient_nodules_predictions(self, df_nodule_predictions: pandas.DataFrame, patient_id, image_dir, view_size): patient_mask = helpers.load_patient_images(patient_id, image_dir, "*_m.png") delete_indices = [] for index, row in df_nodule_predictions.iterrows(): z_perc = row["coord_z"] y_perc = row["coord_y"] center_x = int(round(row["coord_x"] * patient_mask.shape[2])) center_y = int(round(y_perc * patient_mask.shape[1])) center_z = int(round(z_perc * patient_mask.shape[0])) mal_score = row["diameter_mm"] start_y = center_y - view_size / 2 start_x = center_x - view_size / 2 nodule_in_mask = False for z_index in [-1, 0, 1]: img = patient_mask[z_index + center_z] start_x = int(start_x) start_y = int(start_y) view_size = int(view_size) img_roi = img[start_y:start_y + view_size, start_x:start_x + view_size] if img_roi.sum() > 255: # more than 1 pixel of mask. self.logger.info("More than 1 pixel of mask. nodule_in_mask is true") nodule_in_mask = True if not nodule_in_mask: self.logger.info("Nodule not in mask: {0} {1} {2}".format(center_x, center_y, center_z)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score else: if center_z < 30: self.logger.info("Z < 30: {0} center z: {1} y_perc: {2} ".format(patient_id, center_z, y_perc)) if mal_score > 0: mal_score *= -1 df_nodule_predictions.loc[index, "diameter_mm"] = mal_score if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85: self.logger.info("SUSPICIOUS FALSEPOSITIVE: {0} center z: {1} y_perc: {2}".format(patient_id, center_z, y_perc)) if center_z < 50 and y_perc < 0.30: self.logger.info( "SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: {0} center z: {1} y_perc: {2}".format(patient_id, center_z, y_perc)) df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True) return df_nodule_predictions
def make_pos_annotation_images(): src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/" dst_dir = settings.WORKING_DIR + "luna16_train_cubes_pos/" for file_path in glob.glob(dst_dir + "*.*"): os.remove(file_path) for patient_index, csv_file in enumerate( glob.glob(src_dir + "*_annos_pos.csv")): patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "") # print(patient_id) # if not "148229375703208214308676934766" in patient_id: # continue df_annos = pandas.read_csv(csv_file) if len(df_annos) == 0: continue images = helpers.load_patient_images(patient_id, settings.LUNA_16_TRAIN_DIR2D2, "*" + CUBE_IMGTYPE_SRC + ".png") for index, row in df_annos.iterrows(): coord_x = int(row["coord_x"] * images.shape[2]) coord_y = int(row["coord_y"] * images.shape[1]) coord_z = int(row["coord_z"] * images.shape[0]) diam_mm = int(row["diameter"] * images.shape[2]) anno_index = int(row["anno_index"]) cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64) if cube_img.sum() < 5: settings.log.info(" ***** Skipping {0} {1} {2}".format( coord_x, coord_y, coord_z)) continue if cube_img.mean() < 10: settings.log.info(" ***** Suspicious {0} {1} {2}".format( coord_x, coord_y, coord_z)) save_cube_img( dst_dir + patient_id + "_" + str(anno_index) + "_" + str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8) helpers.print_tabbed([patient_index, patient_id, len(df_annos)], [5, 64, 8])
def predict_patient(self, patient_id, image_dir, result_dir, magnification=1, flip=False): if self.model is None: self.logger.error("The model is None. Please call generate_model() to generate the model at first.") return None if not os.path.exists(result_dir): os.makedirs(result_dir) sw = helpers.Stopwatch.start_new() csv_target_path = result_dir + patient_id + ".csv" all_predictions_csv = [] patient_img = helpers.load_patient_images(patient_id, image_dir, "*_i.png", []) if magnification != 1: patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification) patient_mask = helpers.load_patient_images(patient_id, image_dir, "*_m.png", []) if magnification != 1: patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True) step = self.PREDICT_STEP CROP_SIZE = self.CUBE_SIZE # CROP_SIZE = 48 predict_volume_shape_list = [0, 0, 0] for dim in range(3): dim_indent = 0 while dim_indent + CROP_SIZE < patient_img.shape[dim]: predict_volume_shape_list[dim] += 1 dim_indent += step predict_volume_shape = ( predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2]) predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float) self.logger.info("Predict volume shape: {0}".format(predict_volume.shape)) done_count = 0 skipped_count = 0 batch_size = 128 batch_list = [] batch_list_coords = [] patient_predictions_csv = [] cube_img = None annotation_index = 0 for z in range(0, predict_volume_shape[0]): for y in range(0, predict_volume_shape[1]): for x in range(0, predict_volume_shape[2]): # if cube_img is None: cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE, x * step:x * step + CROP_SIZE] if cube_mask.sum() < 2000: skipped_count += 1 self.logger.info("Cube x {0} y {1} z {2} is skipped!!!".format(x, y, z)) else: if flip: cube_img = cube_img[:, :, ::-1] if CROP_SIZE != self.CUBE_SIZE: cube_img = helpers.rescale_patient_images2(cube_img, (self.CUBE_SIZE, self.CUBE_SIZE, self.CUBE_SIZE)) # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4) # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE)) img_prep = self.prepare_image_for_net3D(cube_img) batch_list.append(img_prep) batch_list_coords.append((z, y, x)) if len(batch_list) % batch_size == 0: batch_data = numpy.vstack(batch_list) p = self.model.predict(batch_data, batch_size=batch_size) for i in range(len(p[0])): p_z = batch_list_coords[i][0] p_y = batch_list_coords[i][1] p_x = batch_list_coords[i][2] nodule_chance = p[0][i][0] predict_volume[p_z, p_y, p_x] = nodule_chance if nodule_chance > self.P_TH: self.logger.info( "Cube x {0} y {1} z {2} is possible nodule, nodule_chance is {3}!!!".format( p_x, p_y, p_z, nodule_chance)) p_z = p_z * step + CROP_SIZE / 2 p_y = p_y * step + CROP_SIZE / 2 p_x = p_x * step + CROP_SIZE / 2 self.logger.info("Cube center x {0} y {1} z {2} ".format(p_x, p_y, p_z)) p_z_perc = round(p_z / patient_img.shape[0], 4) p_y_perc = round(p_y / patient_img.shape[1], 4) p_x_perc = round(p_x / patient_img.shape[2], 4) diameter_mm = round(p[1][i][0], 4) self.logger.info("Cube diameter_mm {0} ".format(diameter_mm)) # diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(2 * step / patient_img.shape[2], 4) diameter_perc = round(diameter_mm / patient_img.shape[2], 4) self.logger.info( "Cube center percentage x {0} y {1} z {2} diamm {3} ".format(p_x_perc, p_y_perc, p_z_perc, diameter_mm)) nodule_chance = round(nodule_chance, 4) patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc, diameter_perc, nodule_chance, diameter_mm] patient_predictions_csv.append(patient_predictions_csv_line) all_predictions_csv.append([patient_id] + patient_predictions_csv_line) annotation_index += 1 batch_list = [] batch_list_coords = [] done_count += 1 if done_count % 10000 == 0: self.logger.info("Done: {0} skipped: {1}".format(done_count, skipped_count)) df = pandas.DataFrame(patient_predictions_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "nodule_chance", "diameter_mm"]) self.filter_patient_nodules_predictions(df, patient_id, image_dir, CROP_SIZE * magnification) df.to_csv(csv_target_path, index=False) self.logger.info("predict_volume mean is {0}".format(predict_volume.mean())) self.logger.info("Done in {0} seconds".format(sw.get_elapsed_seconds()))