def cluster_prediction_result(pred_dir, eps, min_samples, hasHeader, isWeightedAvg=False, prob_threshold=0): """ cluster the prediction results to avoid the duplicated predictions introduced by the small stride. Args: pred_dir: directory for the prediction result eps: maximum distance between two samples for them to be considered as in the same neighborhood. min_samples: number of samples (or total weight) in a neighborhood for a point to be considered as a core point. hasHeader: boolean value to indicate if the csv file has the header isWeightedAvg: boolean value to indicate if add the prediction. probabilities as the weight to compute the averaged coordinates of each cluster. prob_threshold: probability threshold over which the location is considered a positive prediction for the purposes of clustering. """ pred_files = list_files(pred_dir, "*.csv") pred_files = get_file_id(pred_files, GROUND_TRUTH_FILE_ID_RE) clustered_dir = os.path.dirname(pred_dir + "/") + "_clustered/" if os.path.exists(clustered_dir): print(clustered_dir) shutil.rmtree(clustered_dir) for k, pred_file in pred_files.items(): print(pred_file) pred_locations = get_locations_from_csv(pred_file, hasHeader=hasHeader, hasProb=True) pred_locations = [ p for p in pred_locations if float(p[2]) > prob_threshold ] # apply dbscan clustering on each prediction file if len(pred_locations) > 0: clustered_pred_locations = dbscan_clustering( pred_locations, eps=eps, min_samples=min_samples, isWeightedAvg=isWeightedAvg) # save the prediction results clustered_file_name = pred_file.replace(pred_dir, clustered_dir) tuple_2_csv(clustered_pred_locations, clustered_file_name, columns={'row', 'col', 'avg_prob'})
def detect_prediction_results(pred_dir, img_dir, radius, prob_thresh, hasHeader): """Detect mitoses from probability maps through an iterative procedure. This will read csv prediction files, and output csv prediction files containing coordinates of the predicted mitoses centers. Args: pred_dir: Directory containing the prediction results img_dir: Directory containing the images radius: Integer value for the radius of the disk kernel. prob_thresh: A floating value representing the lower bound on the probability values. hasHeader: Boolean value to indicate if the csv file has the header """ pred_files = list_files(pred_dir, "*.csv") pred_files = get_file_id(pred_files, GROUND_TRUTH_FILE_ID_RE) img_files = list_files(img_dir, "*.tif") img_files = get_file_id(img_files, GROUND_TRUTH_FILE_ID_RE) for k, pred_file in pred_files.items(): # convert ijv predictions to prob maps img_file = img_files[k] h, w = Image.open(img_file).size probs = csv_2_arr(pred_file, h, w, hasHeader=hasHeader) # detect the centers of the mitoses preds_detected = identify_mitoses(probs, radius, prob_thresh) # save the prediction results detected_dir = os.path.dirname(pred_dir + "/") + "_detected" detected_file_name = pred_file.replace(pred_dir, detected_dir) tuple_2_csv(preds_detected, detected_file_name, columns={'row', 'col', 'prob'})
def smooth_prediction_results(pred_dir, img_dir, radius, hasHeader): """Smooth the probability maps by convolving them with a disk kernel. This will read csv prediction files, and output smoothed csv prediction files. Args: pred_dir: Directory containing the prediction results img_dir: Directory containing the images radius: Integer value for the radius of the disk kernel. hasHeader: Boolean value to indicate if the csv file has the header """ pred_files = list_files(pred_dir, "*.csv") pred_files = get_file_id(pred_files, GROUND_TRUTH_FILE_ID_RE) img_files = list_files(img_dir, "*.tif") img_files = get_file_id(img_files, GROUND_TRUTH_FILE_ID_RE) # create conv smoothing graph probs_tf = tf.placeholder(shape=[None, None], dtype=tf.float32) probs_smooth_tf = conv_smooth(probs_tf, radius) with tf.Session() as sess: for k, pred_file in pred_files.items(): # convert ijv predictions to prob maps img_file = img_files[k] h, w = Image.open(img_file).size probs = csv_2_arr(pred_file, h, w, hasHeader=hasHeader) # smooth the probability maps probs_smooth = sess.run(probs_smooth_tf, feed_dict={probs_tf: probs}) # save the prediction results smooth_dir = os.path.dirname(pred_dir + "/") + "_smoothed" smooth_file_name = pred_file.replace(pred_dir, smooth_dir) arr_2_csv(probs_smooth, smooth_file_name)