Esempio n. 1
0
def make_candidate_auto_images(candidate_types=[]):
    dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_auto/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for candidate_type in candidate_types:
        for file_path in glob.glob(dst_dir + "*_" + candidate_type + ".png"):
            os.remove(file_path)

    for candidate_type in candidate_types:
        if candidate_type == "falsepos":
            src_dir = "resources/luna16_falsepos_labels/"
        else:
            src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

        for index, csv_file in enumerate(
                glob.glob(src_dir + "*_candidates_" + candidate_type +
                          ".csv")):
            patient_id = ntpath.basename(csv_file).replace(
                "_candidates_" + candidate_type + ".csv", "")
            settings.log.info("{0}, patient: {1}, type:{2} ".format(
                index, patient_id, candidate_type))
            # if not "148229375703208214308676934766" in patient_id:
            #     continue
            df_annos = pandas.read_csv(csv_file)
            if len(df_annos) == 0:
                continue
            images = helpers.load_patient_images(
                patient_id,
                settings.LUNA16_EXTRACTED_IMAGE_DIR,
                "*" + CUBE_IMGTYPE_SRC + ".png",
                exclude_wildcards=[])

            row_no = 0
            for index, row in df_annos.iterrows():
                coord_x = int(row["coord_x"] * images.shape[2])
                coord_y = int(row["coord_y"] * images.shape[1])
                coord_z = int(row["coord_z"] * images.shape[0])
                anno_index = int(row["anno_index"])
                cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z,
                                             48)
                if cube_img.sum() < 10:
                    settings.log.info(" ***** Skipping {0} {1} {2} ".format(
                        coord_x, coord_y, coord_z))
                    continue
                # print(cube_img.sum())
                try:
                    save_cube_img(
                        dst_dir + patient_id + "_" + str(anno_index) + "_0_" +
                        candidate_type + ".png", cube_img, 6, 8)
                except Exception as ex:
                    settings.log.exception(ex)

                row_no += 1
                max_item = 240 if candidate_type == "white" else 200
                if candidate_type == "luna":
                    max_item = 500
                if row_no > max_item:
                    break
Esempio n. 2
0
def make_pos_annotation_images_manual():
    src_dir = "resources/luna16_manual_labels/"

    dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*_manual.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4" not in patient_id:
            continue

        print(patient_id)
        # if not "172845185165807139298420209778" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(
            patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            diameter = int(row["d"] * images.shape[2])
            node_type = int(row["id"])
            malscore = int(diameter)
            malscore = min(25, malscore)
            malscore = max(16, malscore)
            anno_index = index
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                settings.log.info(" ***** Skipping {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))
                continue

            if cube_img.mean() < 10:
                settings.log.info(" ***** Suspicious {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))

            if cube_img.shape != (64, 64, 64):
                settings.log.info(
                    " ***** incorrect shape !!! {0} - {1} {2} {3} ".format(
                        str(anno_index), coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore) + "_1_" + ("pos" if node_type == 0 else "neg") +
                ".png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
Esempio n. 3
0
def make_pos_annotation_images_manual_ndsb3():
    src_dir = "resources/ndsb3_manual_labels/"
    dst_dir = settings.WORKING_DIR + "generated_traindata/ndsb3_train_cubes_manual/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    train_label_df = pandas.read_csv("resources/stage1_labels.csv")
    train_label_df.set_index(["id"], inplace=True)
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(glob.glob(src_dir + "*.csv")):
        patient_id = ntpath.basename(csv_file).replace(".csv", "")
        if "1.3.6.1.4.1" in patient_id:
            continue

        cancer_label = train_label_df.loc[patient_id]["cancer"]
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(
            patient_id, settings.NDSB3_EXTRACTED_IMAGE_DIR,
            "*" + CUBE_IMGTYPE_SRC + ".png")

        anno_index = 0
        for index, row in df_annos.iterrows():
            pos_neg = "pos" if row["id"] == 0 else "neg"
            coord_x = int(row["x"] * images.shape[2])
            coord_y = int(row["y"] * images.shape[1])
            coord_z = int(row["z"] * images.shape[0])
            malscore = int(round(row["dmm"]))
            anno_index += 1
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                settings.log.info(" ***** Skipping {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))
                continue

            if cube_img.mean() < 10:
                settings.log.info(" ***** Suspicious {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))

            if cube_img.shape != (64, 64, 64):
                settings.log.info(
                    " ***** incorrect shape !!! {0} - {1} {2} {3} ".format(
                        str(anno_index), coord_x, coord_y, coord_z))
                continue
            print(patient_id)
            assert malscore > 0 or pos_neg == "neg"
            save_cube_img(
                dst_dir + "ndsb3manual_" + patient_id + "_" + str(anno_index) +
                "_" + pos_neg + "_" + str(cancer_label) + "_" + str(malscore) +
                "_1_pn.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
Esempio n. 4
0
def make_annotation_images_lidc():
    src_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/"

    dst_dir = settings.WORKING_DIR + "generated_traindata/luna16_train_cubes_lidc/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos_lidc.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos_lidc.csv",
                                                       "")
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(
            patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR,
            "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            malscore = int(row["malscore"])
            anno_index = row["anno_index"]
            anno_index = str(anno_index).replace(" ", "xspacex").replace(
                ".", "xpointx").replace("_", "xunderscorex")
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                settings.log.info(" ***** Skipping {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))
                continue

            if cube_img.mean() < 10:
                settings.log.info(" ***** Suspicious {0} {1} {2} ".format(
                    coord_x, coord_y, coord_z))

            if cube_img.shape != (64, 64, 64):
                settings.log.info(
                    " ***** incorrect shape !!! {0} - {1} {2} {3} ".format(
                        str(anno_index), coord_x, coord_y, coord_z))
                continue

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(malscore * malscore) + "_1_pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
Esempio n. 5
0
    def filter_patient_nodules_predictions(self, df_nodule_predictions, patient_id, image_dir, view_size):
    # def filter_patient_nodules_predictions(self, df_nodule_predictions: pandas.DataFrame, patient_id, image_dir, view_size):
        patient_mask = helpers.load_patient_images(patient_id, image_dir, "*_m.png")
        delete_indices = []
        for index, row in df_nodule_predictions.iterrows():
            z_perc = row["coord_z"]
            y_perc = row["coord_y"]
            center_x = int(round(row["coord_x"] * patient_mask.shape[2]))
            center_y = int(round(y_perc * patient_mask.shape[1]))
            center_z = int(round(z_perc * patient_mask.shape[0]))

            mal_score = row["diameter_mm"]
            start_y = center_y - view_size / 2
            start_x = center_x - view_size / 2
            nodule_in_mask = False
            for z_index in [-1, 0, 1]:
                img = patient_mask[z_index + center_z]
                start_x = int(start_x)
                start_y = int(start_y)
                view_size = int(view_size)
                img_roi = img[start_y:start_y + view_size, start_x:start_x + view_size]
                if img_roi.sum() > 255:  # more than 1 pixel of mask.
                    self.logger.info("More than 1 pixel of mask. nodule_in_mask is true")
                    nodule_in_mask = True

            if not nodule_in_mask:
                self.logger.info("Nodule not in mask: {0} {1} {2}".format(center_x, center_y, center_z))
                if mal_score > 0:
                    mal_score *= -1
                df_nodule_predictions.loc[index, "diameter_mm"] = mal_score
            else:
                if center_z < 30:
                    self.logger.info("Z < 30: {0} center z: {1}  y_perc: {2} ".format(patient_id, center_z, y_perc))
                    if mal_score > 0:
                        mal_score *= -1
                    df_nodule_predictions.loc[index, "diameter_mm"] = mal_score

                if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85:
                    self.logger.info("SUSPICIOUS FALSEPOSITIVE: {0}  center z: {1}  y_perc: {2}".format(patient_id, center_z,
                                                                                                   y_perc))

                if center_z < 50 and y_perc < 0.30:
                    self.logger.info(
                        "SUSPICIOUS FALSEPOSITIVE OUT OF RANGE: {0} center z: {1} y_perc: {2}".format(patient_id,
                                                                                                      center_z, y_perc))

        df_nodule_predictions.drop(df_nodule_predictions.index[delete_indices], inplace=True)
        return df_nodule_predictions
Esempio n. 6
0
def make_pos_annotation_images():
    src_dir = settings.LUNA_16_TRAIN_DIR2D2 + "metadata/"
    dst_dir = settings.WORKING_DIR + "luna16_train_cubes_pos/"
    for file_path in glob.glob(dst_dir + "*.*"):
        os.remove(file_path)

    for patient_index, csv_file in enumerate(
            glob.glob(src_dir + "*_annos_pos.csv")):
        patient_id = ntpath.basename(csv_file).replace("_annos_pos.csv", "")
        # print(patient_id)
        # if not "148229375703208214308676934766" in patient_id:
        #     continue
        df_annos = pandas.read_csv(csv_file)
        if len(df_annos) == 0:
            continue
        images = helpers.load_patient_images(patient_id,
                                             settings.LUNA_16_TRAIN_DIR2D2,
                                             "*" + CUBE_IMGTYPE_SRC + ".png")

        for index, row in df_annos.iterrows():
            coord_x = int(row["coord_x"] * images.shape[2])
            coord_y = int(row["coord_y"] * images.shape[1])
            coord_z = int(row["coord_z"] * images.shape[0])
            diam_mm = int(row["diameter"] * images.shape[2])
            anno_index = int(row["anno_index"])
            cube_img = get_cube_from_img(images, coord_x, coord_y, coord_z, 64)
            if cube_img.sum() < 5:
                settings.log.info(" ***** Skipping {0} {1} {2}".format(
                    coord_x, coord_y, coord_z))
                continue

            if cube_img.mean() < 10:
                settings.log.info(" ***** Suspicious {0} {1} {2}".format(
                    coord_x, coord_y, coord_z))

            save_cube_img(
                dst_dir + patient_id + "_" + str(anno_index) + "_" +
                str(diam_mm) + "_1_" + "pos.png", cube_img, 8, 8)
        helpers.print_tabbed([patient_index, patient_id,
                              len(df_annos)], [5, 64, 8])
Esempio n. 7
0
    def predict_patient(self, patient_id, image_dir, result_dir, magnification=1, flip=False):
        if self.model is None:
            self.logger.error("The model is None. Please call generate_model() to generate the model at first.")
            return None
        
        if not os.path.exists(result_dir):
            os.makedirs(result_dir)

        sw = helpers.Stopwatch.start_new()
        csv_target_path = result_dir + patient_id + ".csv"
        all_predictions_csv = []
        patient_img = helpers.load_patient_images(patient_id, image_dir, "*_i.png", [])
        if magnification != 1:
            patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)

        patient_mask = helpers.load_patient_images(patient_id, image_dir, "*_m.png", [])
        if magnification != 1:
            patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

        step = self.PREDICT_STEP
        CROP_SIZE = self.CUBE_SIZE
        # CROP_SIZE = 48

        predict_volume_shape_list = [0, 0, 0]
        for dim in range(3):
            dim_indent = 0
            while dim_indent + CROP_SIZE < patient_img.shape[dim]:
                predict_volume_shape_list[dim] += 1
                dim_indent += step

        predict_volume_shape = (
        predict_volume_shape_list[0], predict_volume_shape_list[1], predict_volume_shape_list[2])
        predict_volume = numpy.zeros(shape=predict_volume_shape, dtype=float)
        self.logger.info("Predict volume shape: {0}".format(predict_volume.shape))
        done_count = 0
        skipped_count = 0
        batch_size = 128
        batch_list = []
        batch_list_coords = []
        patient_predictions_csv = []
        cube_img = None
        annotation_index = 0

        for z in range(0, predict_volume_shape[0]):
            for y in range(0, predict_volume_shape[1]):
                for x in range(0, predict_volume_shape[2]):
                    # if cube_img is None:
                    cube_img = patient_img[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE,
                               x * step:x * step + CROP_SIZE]
                    cube_mask = patient_mask[z * step:z * step + CROP_SIZE, y * step:y * step + CROP_SIZE,
                                x * step:x * step + CROP_SIZE]

                    if cube_mask.sum() < 2000:
                        skipped_count += 1
                        self.logger.info("Cube x {0} y  {1} z {2} is skipped!!!".format(x, y, z))
                    else:
                        if flip:
                            cube_img = cube_img[:, :, ::-1]

                        if CROP_SIZE != self.CUBE_SIZE:
                            cube_img = helpers.rescale_patient_images2(cube_img, (self.CUBE_SIZE, self.CUBE_SIZE, self.CUBE_SIZE))
                            # helpers.save_cube_img("c:/tmp/cube.png", cube_img, 8, 4)
                            # cube_mask = helpers.rescale_patient_images2(cube_mask, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

                        img_prep = self.prepare_image_for_net3D(cube_img)
                        batch_list.append(img_prep)
                        batch_list_coords.append((z, y, x))
                        if len(batch_list) % batch_size == 0:
                            batch_data = numpy.vstack(batch_list)
                            p = self.model.predict(batch_data, batch_size=batch_size)
                            for i in range(len(p[0])):
                                p_z = batch_list_coords[i][0]
                                p_y = batch_list_coords[i][1]
                                p_x = batch_list_coords[i][2]
                                nodule_chance = p[0][i][0]
                                predict_volume[p_z, p_y, p_x] = nodule_chance
                                if nodule_chance > self.P_TH:
                                    self.logger.info(
                                        "Cube x {0} y  {1} z {2} is possible nodule, nodule_chance is {3}!!!".format(
                                            p_x, p_y, p_z, nodule_chance))
                                    p_z = p_z * step + CROP_SIZE / 2
                                    p_y = p_y * step + CROP_SIZE / 2
                                    p_x = p_x * step + CROP_SIZE / 2

                                    self.logger.info("Cube center x {0} y {1} z {2} ".format(p_x, p_y, p_z))
                                    p_z_perc = round(p_z / patient_img.shape[0], 4)
                                    p_y_perc = round(p_y / patient_img.shape[1], 4)
                                    p_x_perc = round(p_x / patient_img.shape[2], 4)
                                    diameter_mm = round(p[1][i][0], 4)
                                    self.logger.info("Cube diameter_mm {0} ".format(diameter_mm))
                                    # diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(2 * step / patient_img.shape[2], 4)
                                    diameter_perc = round(diameter_mm / patient_img.shape[2], 4)
                                    self.logger.info(
                                        "Cube center percentage x {0} y {1} z {2} diamm {3} ".format(p_x_perc, p_y_perc,
                                                                                                     p_z_perc,
                                                                                                     diameter_mm))
                                    nodule_chance = round(nodule_chance, 4)
                                    patient_predictions_csv_line = [annotation_index, p_x_perc, p_y_perc, p_z_perc,
                                                                    diameter_perc, nodule_chance, diameter_mm]
                                    patient_predictions_csv.append(patient_predictions_csv_line)
                                    all_predictions_csv.append([patient_id] + patient_predictions_csv_line)
                                    annotation_index += 1

                            batch_list = []
                            batch_list_coords = []
                    done_count += 1
                    if done_count % 10000 == 0:
                        self.logger.info("Done: {0} skipped: {1}".format(done_count, skipped_count))

        df = pandas.DataFrame(patient_predictions_csv,
                              columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter",
                              "nodule_chance", "diameter_mm"])

        self.filter_patient_nodules_predictions(df, patient_id, image_dir, CROP_SIZE * magnification)
        df.to_csv(csv_target_path, index=False)
        self.logger.info("predict_volume mean is {0}".format(predict_volume.mean()))
        self.logger.info("Done in {0} seconds".format(sw.get_elapsed_seconds()))