def label_detected_nucleus(nucleus_dir, ground_truth_dir): ground_truth_csvs = [str(f) for f in Path(ground_truth_dir).glob('*/*.csv')] label_output = [] for ground_truth_csv in ground_truth_csvs: ground_truth_dir, base = os.path.split(ground_truth_csv) sub_dir = os.path.split(ground_truth_dir)[1] inference_csv = os.path.join(nucleus_dir, "{}-{}".format(sub_dir, base)) label_csv = os.path.join(nucleus_dir, "label-{}-{}".format(sub_dir, base)) ground_truth_locations = get_locations_from_csv( ground_truth_csv, hasHeader=False, hasProb=False) inference_locations = get_locations_from_csv( inference_csv, hasHeader=True, hasProb=True) label_output.clear() for (y1, x1, prob) in inference_locations: inside = False for (y2, x2) in ground_truth_locations: if is_inside(x1, y1, x2, y2, 32): inside = True label_output.append((y1, x1, prob, True)) break if not inside: label_output.append((y1, x1, prob, False)) print(len(label_output), len(inference_locations)) assert len(label_output) == len(inference_locations) tuple_2_csv(label_output, label_csv, ['Y', 'X', 'prob', 'is_mitosis'])
def combine_csvs(input_dir, output_dir, hasHeader=True, hasProb=True, clean_output_dir=False): if clean_output_dir: shutil.rmtree(output_dir) input_files = [str(f) for f in Path(input_dir).glob('**/**/*.csv')] combine_csvs = {} for input_file in input_files: points = get_locations_from_csv(input_file, hasHeader=hasHeader, hasProb=hasProb) basename, y_offset, x_offset = \ os.path.basename(input_file).split('.')[0].split("_") if not basename in combine_csvs: combine_csvs[basename] = [] y_offset = int(y_offset) x_offset = int(x_offset) for i in range(len(points)): points[i] = \ (points[i][0] + y_offset, points[i][1] + x_offset, points[i][2]) combine_csvs[basename].extend(points) os.makedirs(output_dir, exist_ok=True) for combined_csv in combine_csvs: tuple_2_csv(combine_csvs[combined_csv], os.path.join(output_dir, combined_csv) + '.csv', columns=['Y', 'X', 'prob'])
def run_inference(model, sess, batch_size, input_dir_path, output_dir_path, num_parallel_calls=1, prob_thres=0.5, eps=64, min_samples=1, isWeightedAvg=False): input_file_paths = [str(f) for f in Path(input_dir_path).glob('*.png')] input_files = np.asarray(input_file_paths, dtype=np.str) input_file_dataset = tf.data.Dataset.from_tensor_slices(input_files) img_dataset = input_file_dataset.map(lambda file: get_image_tf(file), num_parallel_calls=1) img_dataset = img_dataset\ .map(lambda img: normalize(img, "resnet_custom"))\ .batch(batch_size=batch_size) img_iterator = img_dataset.make_one_shot_iterator() next_batch = img_iterator.get_next() prob_result = np.empty((0, 1)) while True: try: img_batch = sess.run(next_batch) pred_np = model.predict(img_batch, batch_size) prob_result = np.concatenate((prob_result, pred_np), axis=0) except tf.errors.OutOfRangeError: print("prediction result size: {}".format(prob_result.shape)) break assert prob_result.shape[0] == input_files.shape[0] mitosis_probs = prob_result[prob_result > prob_thres] input_files = input_files.reshape(-1, 1) mitosis_patch_files = input_files[prob_result > prob_thres] inference_result = [] for i in range(mitosis_patch_files.size): row, col = get_location_from_file_name(mitosis_patch_files[i]) prob = mitosis_probs[i] inference_result.append((row, col, prob)) if len(inference_result) > 0: clustered_pred_locations = dbscan_clustering( inference_result, eps=eps, min_samples=min_samples, isWeightedAvg=isWeightedAvg) tuple_2_csv(inference_result, os.path.join(output_dir_path, 'mitosis_locations.csv')) tuple_2_csv( clustered_pred_locations, os.path.join(output_dir_path, 'clustered_mitosis_locations.csv')) else: print("Do not have mitosis in {}".format(input_dir_path))
def detect(model, dataset_dir, subset, submit_dir): """Run detection on images in the given directory.""" print("Running on {}".format(dataset_dir)) # Create directory if not os.path.exists(RESULTS_DIR): os.makedirs(RESULTS_DIR) #submit_dir = "submit_{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()) submit_dir = os.path.join(RESULTS_DIR, submit_dir) print("Inference results are saved to ", submit_dir) os.makedirs(submit_dir) # Read dataset dataset = NucleusDataset() dataset.load_nucleus(dataset_dir, subset) dataset.prepare() # Load over images submission = [] for image_id in dataset.image_ids: # Load image and run detection image = dataset.load_image(image_id) # Detect objects r = model.detect([image], verbose=0)[0] # Encode image to RLE. Returns a string of multiple lines source_id = dataset.image_info[image_id]["id"] # rle = mask_to_rle(source_id, r["masks"], r["scores"]) # submission.append(rle) # Save image with masks image_cv, nucleus_centers = visualize.visualize_instances( image, r['rois'], r['masks'], r['class_ids'], dataset.class_names, r['scores'], show_bbox=False, show_mask=False, title="Predictions") #plt.savefig("{}/{}.png".format(submit_dir, dataset.image_info[image_id]["id"]), pad_inches=0) cv2.imwrite("{}/{}.png".format( submit_dir, dataset.image_info[image_id]["id"]), image_cv) tuple_2_csv(nucleus_centers, "{}/{}.csv".format( submit_dir, dataset.image_info[image_id]["id"]), columns=['Y', 'X']) # Save to csv file # submission = "ImageId,EncodedPixels\n" + "\n".join(submission) # file_path = os.path.join(submit_dir, "submit.csv") # with open(file_path, "w") as f: # f.write(submission) # print("Saved to ", submit_dir) return submit_dir
def run_mitosis_classification(model, sess, batch_size, input_dir_path, output_dir_path, augmentation_number, mitosis_tile_size=64, num_parallel_calls=1, prefetch=32, prob_thres=0.5, eps=64, min_samples=1, isWeightedAvg=False): input_file_paths = [str(f) for f in Path(input_dir_path).glob('*.png')] input_files = np.asarray(input_file_paths, dtype=np.str) input_file_dataset = tf.data.Dataset.from_tensor_slices(input_files) img_dataset = input_file_dataset.map(lambda file: get_image_tf(file), num_parallel_calls=1) if augmentation_number == 1: img_dataset = img_dataset\ .map(lambda img: normalize(img, "resnet_custom"), num_parallel_calls=num_parallel_calls)\ .batch(batch_size)\ .prefetch(prefetch) # Make sure all the files in the dataset are feeded into inference float_steps = len(input_file_paths) / batch_size int_steps = len(input_file_paths) // batch_size steps = math.ceil(float_steps) if float_steps > int_steps else int_steps else: img_dataset = img_dataset \ .map(lambda img: create_augmented_batch(img, augmentation_number, mitosis_tile_size), num_parallel_calls=num_parallel_calls) \ .map(lambda img: normalize(img, "resnet_custom"), num_parallel_calls=num_parallel_calls) \ .prefetch(prefetch) steps = len(input_file_paths) img_iterator = img_dataset.make_one_shot_iterator() next_batch = img_iterator.get_next() while True: try: pred_np = model.predict(next_batch, steps=steps) print("Prediction result shape: ", pred_np.shape) except tf.errors.OutOfRangeError: print("Please check the steps parameter. steps = {}, " "batch_size = {}, input_tile_size = {}, " "augmentation_number = {}" .format(steps, batch_size, input_files.shape, augmentation_number)) break prob_result = \ np.average(pred_np.reshape(-1, augmentation_number), axis=1) print("Finish the inference on {} with {} input tiles" .format(input_dir_path, prob_result.shape)) assert prob_result.shape[0] == input_files.shape[0] mitosis_probs = prob_result[prob_result > prob_thres] input_files = input_files.reshape(-1, 1) mitosis_patch_files = input_files[prob_result > prob_thres] inference_result = [] for i in range(mitosis_patch_files.size): row, col = get_location_from_file_name(mitosis_patch_files[i]) prob = mitosis_probs[i] inference_result.append((row, col, prob)) if len(inference_result) > 0: clustered_pred_locations = dbscan_clustering( inference_result, eps=eps, min_samples=min_samples, isWeightedAvg=isWeightedAvg) tuple_2_csv( inference_result, os.path.join(output_dir_path, 'mitosis_locations.csv')) tuple_2_csv( clustered_pred_locations, os.path.join(output_dir_path, 'clustered_mitosis_locations.csv')) else: print("Do not have mitosis in {}".format(input_dir_path))