def label_detected_nucleus(nucleus_dir, ground_truth_dir):
    ground_truth_csvs = [str(f) for f in Path(ground_truth_dir).glob('*/*.csv')]
    label_output = []
    for ground_truth_csv in ground_truth_csvs:
        ground_truth_dir, base = os.path.split(ground_truth_csv)
        sub_dir = os.path.split(ground_truth_dir)[1]
        inference_csv = os.path.join(nucleus_dir, "{}-{}".format(sub_dir, base))
        label_csv = os.path.join(nucleus_dir,
          "label-{}-{}".format(sub_dir, base))
        ground_truth_locations = get_locations_from_csv(
            ground_truth_csv, hasHeader=False, hasProb=False)
        inference_locations = get_locations_from_csv(
            inference_csv, hasHeader=True, hasProb=True)
        label_output.clear()
        for (y1, x1, prob) in inference_locations:
          inside = False
          for (y2, x2) in ground_truth_locations:
            if is_inside(x1, y1, x2, y2, 32):
              inside = True
              label_output.append((y1, x1, prob, True))
              break
          if not inside:
            label_output.append((y1, x1, prob, False))
        print(len(label_output), len(inference_locations))
        assert len(label_output) == len(inference_locations)
        tuple_2_csv(label_output, label_csv, ['Y', 'X', 'prob', 'is_mitosis'])
def check_nucleus_inference(inference_dir, ground_truth_dir):
    ground_truth_csvs = [
        str(f) for f in Path(ground_truth_dir).glob('*/*.csv')
    ]
    matched_count = 0
    total_count = 0
    for ground_truth_csv in ground_truth_csvs:
        ground_truth_dir, base = os.path.split(ground_truth_csv)
        sub_dir = os.path.split(ground_truth_dir)[1]
        inference_csv = os.path.join(inference_dir,
                                     "{}-{}".format(sub_dir, base))
        ground_truth_locations = get_locations_from_csv(ground_truth_csv,
                                                        hasHeader=False,
                                                        hasProb=False)
        inference_locations = get_locations_from_csv(inference_csv,
                                                     hasHeader=True,
                                                     hasProb=False)
        for (x1, y1) in ground_truth_locations:
            total_count = total_count + 1
            for (x2, y2) in inference_locations:
                if is_inside(x2, y2, x1, y1, 32):
                    matched_count = matched_count + 1
                    break
    print("There are {} ground truth points, found {} of them.".format(
        total_count, matched_count))
def combine_csvs(input_dir, output_dir, hasHeader=True, hasProb=True,
                 clean_output_dir=False):
    if clean_output_dir:
        shutil.rmtree(output_dir)
    input_files = [str(f) for f in Path(input_dir).glob('**/**/*.csv')]
    combine_csvs = {}
    for input_file in input_files:
        points = get_locations_from_csv(input_file, hasHeader=hasHeader,
                                     hasProb=hasProb)
        basename, y_offset, x_offset = \
            os.path.basename(input_file).split('.')[0].split("_")
        if not basename in combine_csvs:
            combine_csvs[basename] = []
        y_offset = int(y_offset)
        x_offset = int(x_offset)
        for i in range(len(points)):
            points[i] = \
                (points[i][0] + y_offset, points[i][1] + x_offset, points[i][2])
        combine_csvs[basename].extend(points)

    os.makedirs(output_dir, exist_ok=True)
    for combined_csv in combine_csvs:
        tuple_2_csv(combine_csvs[combined_csv],
                    os.path.join(output_dir, combined_csv) + '.csv',
                    columns=['Y', 'X', 'prob'])
Beispiel #4
0
def cluster_prediction_result(pred_dir,
                              eps,
                              min_samples,
                              hasHeader,
                              isWeightedAvg=False,
                              prob_threshold=0):
    """ cluster the prediction results to avoid the duplicated
  predictions introduced by the small stride.

  Args:
    pred_dir: directory for the prediction result
    eps: maximum distance between two samples for them to be considered
      as in the same neighborhood.
    min_samples: number of samples (or total weight) in a neighborhood
      for a point to be considered as a core point.
    hasHeader: boolean value to indicate if the csv file has the header
    isWeightedAvg: boolean value to indicate if add the prediction.
      probabilities as  the weight to compute the averaged coordinates
      of each cluster.
    prob_threshold: probability threshold over which the location is
      considered a positive prediction for the purposes of clustering.
  """
    pred_files = list_files(pred_dir, "*.csv")
    pred_files = get_file_id(pred_files, GROUND_TRUTH_FILE_ID_RE)
    clustered_dir = os.path.dirname(pred_dir + "/") + "_clustered/"
    if os.path.exists(clustered_dir):
        print(clustered_dir)
        shutil.rmtree(clustered_dir)

    for k, pred_file in pred_files.items():
        print(pred_file)
        pred_locations = get_locations_from_csv(pred_file,
                                                hasHeader=hasHeader,
                                                hasProb=True)

        pred_locations = [
            p for p in pred_locations if float(p[2]) > prob_threshold
        ]

        # apply dbscan clustering on each prediction file
        if len(pred_locations) > 0:
            clustered_pred_locations = dbscan_clustering(
                pred_locations,
                eps=eps,
                min_samples=min_samples,
                isWeightedAvg=isWeightedAvg)

            # save the prediction results
            clustered_file_name = pred_file.replace(pred_dir, clustered_dir)

            tuple_2_csv(clustered_pred_locations,
                        clustered_file_name,
                        columns={'row', 'col', 'avg_prob'})
Beispiel #5
0
def csv_2_arr(csv_file, h, w, hasHeader):
    """Convert a csv file with the columns (row,col,val) to a 2D array in
  which [row,col] = val.

  Args:
    csv_file: csv file path.
    h: The number of rows in the resulting array.
    w: The number of columns in the resulting array.
    hasHeader: boolean value to indicate if the input csv file have a
      header.

  Returns:
    A 2D NumPy array of shape (h, w) in which [row,col] = val.
  """

    points = get_locations_from_csv(csv_file,
                                    hasHeader=hasHeader,
                                    hasProb=True)
    points = np.asarray(points)
    return ijv_2_arr(points, h, w)
Beispiel #6
0
def extract_patches(img_dir,
                    location_csv_dir,
                    output_patch_basedir,
                    patch_size=64):
    location_csv_files = [str(f) for f in Path(location_csv_dir).glob('*.csv')]
    if len(location_csv_files) == 0:
        raise ValueError(
            "Please check the input dir for the location csv files.")

    for location_csv_file in location_csv_files:
        print("Processing {} ......".format(location_csv_file))
        points = get_locations_from_csv(location_csv_file,
                                        hasHeader=True,
                                        hasProb=False)
        # Get the image file name.
        subfolder = os.path.basename(location_csv_file) \
            .replace('-', '/') \
            .replace('.csv', '')
        img_file = os.path.join(img_dir, "{}.tif".format(subfolder))
        print("Processing {} ......".format(img_file))
        img = cv2.imread(img_file)
        img = np.asarray(img)[:, :, ::-1]

        output_patch_dir = os.path.join(output_patch_basedir, subfolder)
        if not os.path.exists(output_patch_dir):
            os.makedirs(output_patch_dir, exist_ok=True)

        for (row, col) in points:
            patch = extract_patch(img, row, col, patch_size)
            save_patch(patch,
                       path=output_patch_dir,
                       lab=0,
                       case=0,
                       region=0,
                       row=row,
                       col=col,
                       rotation=0,
                       row_shift=0,
                       col_shift=0,
                       suffix=0,
                       ext="png")