Esempio n. 1
0
def getAtts(nods, pid):
    assert type(nods) is dict
    scan = pl.query(
        pl.Scan).filter(pl.Scan.patient_id == 'LIDC-IDRI-{}'.format(
            str(pid).zfill(4))).first()
    image = preprocess(scan)
    lung_mask = segment_lung_mask(image, True)
    x1, x2, y1, y2, z1, z2 = create_bounding_box(lung_mask)
    shape = scan.to_volume().shape

    allNodules = scan.cluster_annotations()
    nodules = [allNodules[i][0] for i in range(len(allNodules))
               ]  #weakness -- only using one annotation when four is provided
    try:
        assert len(nodules) == len(allNodules)
        assert len(nodules) == len(nods)

        for a in range(len(nodules)):
            nod_mask, nod_bbox, ann_masks = consensus(allNodules[a])
            scan_vol = np.zeros(shape)
            scan_vol[nod_bbox] = nod_mask
            temp_vol = reshape_mask(scan_vol, image)
            final_vol = temp_vol[x1:x2, y1:y2, z1:z2]
            for b in range(len(nods)):
                numIntersect = np.sum(final_vol & nods[b].array)
                if numIntersect == np.sum(final_vol):
                    nods[b].subtlety = int(nodules[a].subtlety)
                    nods[b].internalStructure = int(
                        nodules[a].internalStructure)
                    nods[b].calcification = int(nodules[a].calcification)
                    nods[b].sphericity = int(nodules[a].sphericity)
                    nods[b].margin = int(nodules[a].margin)
                    nods[b].lobulation = int(nodules[a].lobulation)
                    nods[b].spiculation = int(nodules[a].spiculation)
                    nods[b].texture = int(nodules[a].texture)
                    nods[b].malignancy = int(nodules[a].malignancy)
    except:
        print('pid: {}, nodules numbers do not match.'.format(pid))

        for a in range(len(nodules)):
            nod_mask, nod_bbox, ann_masks = consensus(allNodules[a])
            scan_vol = np.zeros(shape)
            scan_vol[nod_bbox] = nod_mask
            temp_vol = reshape_mask(scan_vol, image)
            final_vol = temp_vol[x1:x2, y1:y2, z1:z2]
            for b in range(len(nods)):
                numIntersect = np.sum(final_vol & nods[b].array)
                if numIntersect == np.sum(
                        final_vol) or numIntersect >= 0.9 * np.sum(final_vol):
                    nods[b].subtlety = int(nodules[a].subtlety)
                    nods[b].internalStructure = int(
                        nodules[a].internalStructure)
                    nods[b].calcification = int(nodules[a].calcification)
                    nods[b].sphericity = int(nodules[a].sphericity)
                    nods[b].margin = int(nodules[a].margin)
                    nods[b].lobulation = int(nodules[a].lobulation)
                    nods[b].spiculation = int(nodules[a].spiculation)
                    nods[b].texture = int(nodules[a].texture)
                    nods[b].malignancy = int(nodules[a].malignancy)
Esempio n. 2
0
def main():
    args = parser.parse_args()
    path = pathlib.Path(args.savedir)
    if args.debug:
        path = path / 'debug'
    print(f"Using {str(path)} as save directory")
    if path.exists() and path.is_dir():
        warnings.warn(f"Directory {str(path)} already exists.")
        if args.overwrite:
            print("Overwrite has been set. Continuing...")
        else:
            print("Terminating execution.")
            return
    else:
        path.mkdir(parents=True, exist_ok=True)

    if args.debug:
        scans = [pl.query(pl.Scan).first()]
    else:
        scans = pl.query(pl.Scan).all()
    for scan in scans:
        print(f"Converting patient {scan.patient_id}")
        vol = scan.to_volume()  # (numpy array)
        mask = np.zeros(vol.shape, dtype=bool)
        nodules = scan.cluster_annotations()
        for nod in nodules:
            # Pad so that cmask is the whole volume
            cmask, _, _ = consensus(nod,
                                    clevel=0.5,
                                    pad=[(vol.shape[i], vol.shape[i])
                                         for i in range(3)])
            mask = np.logical_or(mask, cmask)
        numpy_to_nifti(vol, path / f"{scan.patient_id}_volume.nii.gz")
        numpy_to_nifti(vol, path / f"{scan.patient_id}_segmask.nii.gz")
def get_anns_con(scan):
    vol = scan.to_volume()
    nods = scan.cluster_annotations()

    anns_mask = []
    anns_bbox = []
    for i, nod in enumerate(nods):
        temp_anns_mask, temp_bbox, nmasks = consensus(nod)
        anns_mask.append(np.multiply(temp_anns_mask, 1))
        anns_bbox.append(temp_bbox)

    mask = np.zeros(vol.shape)
    for i in range(len(nods)):
        mask[anns_bbox[i]] = (anns_mask[i])
    return mask
Esempio n. 4
0
def get_image(nodule, vol):

    anns = nodule

    print(anns[0].malignancy, anns[0].Malignancy)

    _, cbbox, _ = consensus(anns, clevel=0.5,
                            pad=[(20, 20), (20, 20), (0, 0)])

    # Get the central slice of the computed bounding box.
    k = int(0.5*(cbbox[2].stop - cbbox[2].start))
    image = normalize(vol[cbbox][:, :, k])
    image = zero_conter(image)

    return image
Esempio n. 5
0
def prepare(pid):
    os.makedirs(os.path.join(cf.raw_data_dir, pid), exist_ok=True)
    # Get scan from pylidc
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
    print("processing:", scan.patient_id)
    vol_shape = scan.to_volume().shape
    # Write scan nrrd
    scan_path = glob.glob(os.path.join(lidc_path, pid, "*", "*", f"{pid}_CT.nrrd"))[0]
    copyfile(scan_path, os.path.join(cf.raw_data_dir, pid, f"{pid}_CT.nrrd"))
    # Cluster the annotations for the scan, and grab one.
    nodules = scan.cluster_annotations()
    nodule_ix = 0
    for nodule_anns in nodules:
        # Build 50% consensus mask
        cmask, cbbox, _ = consensus(nodule_anns, clevel=0.5)
        cmask_full = np.zeros(vol_shape)
        cmask_full[cbbox] = cmask
        # Load header from NRRD
        header = nrrd.read_header(scan_path)
        # Write consensus to nrrd
        cmask_full = np.swapaxes(cmask_full, 0, 1)
        nodule_id = f"{pid}_nod_{nodule_ix}"
        nrrd.write(os.path.join(cf.raw_data_dir, pid, f"{nodule_id}.nrrd"), cmask_full, header=header)
        nodule_ix = nodule_ix + 1
Esempio n. 6
0
        ct = 'LIDC-IDRI-' + str(i + 1)

    print("Pacient ID: ", str(ct))

    try:
        # Query for a scan, and convert it to an array volume.
        scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == ct).first()
        # scan1 = pl.query(pl.Annotation).filter(pl.Annotation.texture == 1)

        vol = scan.to_volume()
        nodules = scan.cluster_annotations()

        for l in range(len(nodules)):
            annotations = nodules[l]
            consensus_mask, consensus_bbox, _ = consensus(annotations,
                                                          clevel=0.5,
                                                          pad=[(0, 0), (0, 0),
                                                               (0, 0)])

            k = consensus_mask.shape[-1] // 2

            # Save image and mask
            image = np.asarray(vol[consensus_bbox][:, :, k])
            mask = np.float32(np.array(consensus_mask[:, :, k]))

            img_sitk = sitk.GetImageFromArray(image)
            sitk.WriteImage(
                img_sitk,
                f"/home/anatielsantos/mestrado/bases/cortes-lidc/image/__vol{i}_nod{l}.nii"
            )

            mask_sitk = sitk.GetImageFromArray(mask)
plt.show()

#Annotation consensus ------------

# Query for a scan, and convert it to an array volume.
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
vol = scan.to_volume()

# Cluster the annotations for the scan, and grab one.
nods = scan.cluster_annotations()
anns = nods[0]

# Perform a consensus consolidation and 50% agreement level.
# We pad the slices to add context for viewing.
cmask, cbbox, masks = consensus(anns,
                                clevel=0.5,
                                pad=[(20, 20), (20, 20), (0, 0)])

# Get the central slice of the computed bounding box.
k = int(0.5 * (cbbox[2].stop - cbbox[2].start))

# Set up the plot.
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.imshow(vol[cbbox][:, :, k], cmap=plt.cm.gray, alpha=0.5)

# Plot the annotation contours for the kth slice.
colors = ['r', 'g', 'b', 'y']
for j in range(len(masks)):
    for c in find_contours(masks[j][:, :, k].astype(float), 0.5):
        label = "Annotation %d" % (j + 1)
        plt.plot(c[:, 1], c[:, 0], colors[j], label=label)
import SimpleITK as sitk
import numpy as np

i = 0
for scan in pl.query(pl.Scan):
    # annotation_groups is a list of of lists of Annotation's
    annotation_groups = scan.cluster_annotations()
    vol = scan.to_volume()

    # Next, for each annotation group, implement your criteria of what qualifies as GGO. E.g.,
    for nodule_annotations in annotation_groups:
        # Only consider nodules with 4 annotators and have >= 50% indicating GGO
        if (len(nodule_annotations) >= 2
                and sum([a.texture == 1 for a in nodule_annotations]) >= 1):
            consensus_mask, consensus_bbox, _ = consensus(nodule_annotations,
                                                          clevel=0.5,
                                                          pad=[(5, 5), (5, 5),
                                                               (0, 0)])

            image = np.asarray(vol[consensus_bbox][:, :, :]).transpose(2, 0, 1)
            mask_image = np.float32(np.array(
                consensus_mask[:, :, :])).transpose(2, 0, 1)

            img_sitk = sitk.GetImageFromArray(image)
            sitk.WriteImage(
                img_sitk,
                f"/home/anatielsantos/mestrado/bases/cortes-lidc/3d/image/ggo/nod{i}.nii"
            )

            mask_sitk = sitk.GetImageFromArray(mask_image)
            sitk.WriteImage(
                mask_sitk,
Esempio n. 9
0
    def prepare_dataset(self):
        # This is to name each image and mask
        prefix = [str(x).zfill(3) for x in range(1000)]

        # Make directory
        if not os.path.exists(self.img_path):
            os.makedirs(self.img_path)
        if not os.path.exists(self.mask_path):
            os.makedirs(self.mask_path)
        if not os.path.exists(self.clean_path_img):
            os.makedirs(self.clean_path_img)
        if not os.path.exists(self.clean_path_mask):
            os.makedirs(self.clean_path_mask)
        if not os.path.exists(self.meta_path):
            os.makedirs(self.meta_path)

        IMAGE_DIR = Path(self.img_path)
        MASK_DIR = Path(self.mask_path)
        CLEAN_DIR_IMAGE = Path(self.clean_path_img)
        CLEAN_DIR_MASK = Path(self.clean_path_mask)

        for patient in tqdm(self.IDRI_list):
            pid = patient  #LIDC-IDRI-0001~
            scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
            nodules_annotation = scan.cluster_annotations()
            vol = scan.to_volume()
            print(
                "Patient ID: {} Dicom Shape: {} Number of Annotated Nodules: {}"
                .format(pid, vol.shape, len(nodules_annotation)))

            patient_image_dir = IMAGE_DIR / pid
            patient_mask_dir = MASK_DIR / pid
            Path(patient_image_dir).mkdir(parents=True, exist_ok=True)
            Path(patient_mask_dir).mkdir(parents=True, exist_ok=True)

            if len(nodules_annotation) > 0:
                # Patients with nodules
                for nodule_idx, nodule in enumerate(nodules_annotation):
                    # Call nodule images. Each Patient will have at maximum 4 annotations as there are only 4 doctors
                    # This current for loop iterates over total number of nodules in a single patient
                    mask, cbbox, masks = consensus(nodule, self.c_level,
                                                   self.padding)
                    lung_np_array = vol[cbbox]

                    # We calculate the malignancy information
                    malignancy, cancer_label = self.calculate_malignancy(
                        nodule)

                    for nodule_slice in range(mask.shape[2]):
                        # This second for loop iterates over each single nodule.
                        # There are some mask sizes that are too small. These may hinder training.
                        if np.sum(mask[:, :,
                                       nodule_slice]) <= self.mask_threshold:
                            continue
                        # Segment Lung part only
                        lung_segmented_np_array = segment_lung(
                            lung_np_array[:, :, nodule_slice])
                        # I am not sure why but some values are stored as -0. <- this may result in datatype error in pytorch training # Not sure
                        lung_segmented_np_array[lung_segmented_np_array ==
                                                -0] = 0
                        # This itereates through the slices of a single nodule
                        # Naming of each file: NI= Nodule Image, MA= Mask Original
                        nodule_name = "{}_NI{}_slice{}".format(
                            pid[-4:], prefix[nodule_idx], prefix[nodule_slice])
                        mask_name = "{}_MA{}_slice{}".format(
                            pid[-4:], prefix[nodule_idx], prefix[nodule_slice])
                        meta_list = [
                            pid[-4:], nodule_idx, prefix[nodule_slice],
                            nodule_name, mask_name, malignancy, cancer_label,
                            False
                        ]

                        self.save_meta(meta_list)
                        np.save(patient_image_dir / nodule_name,
                                lung_segmented_np_array)
                        np.save(patient_mask_dir / mask_name,
                                mask[:, :, nodule_slice])
            else:
                print("Clean Dataset", pid)
                patient_clean_dir_image = CLEAN_DIR_IMAGE / pid
                patient_clean_dir_mask = CLEAN_DIR_MASK / pid
                Path(patient_clean_dir_image).mkdir(parents=True,
                                                    exist_ok=True)
                Path(patient_clean_dir_mask).mkdir(parents=True, exist_ok=True)
                #There are patients that don't have nodule at all. Meaning, its a clean dataset. We need to use this for validation
                for slice in range(vol.shape[2]):
                    if slice > 50:
                        break
                    lung_segmented_np_array = segment_lung(vol[:, :, slice])
                    lung_segmented_np_array[lung_segmented_np_array == -0] = 0
                    lung_mask = np.zeros_like(lung_segmented_np_array)

                    #CN= CleanNodule, CM = CleanMask
                    nodule_name = "{}/{}_CN001_slice{}".format(
                        pid, pid[-4:], prefix[slice])
                    mask_name = "{}/{}_CM001_slice{}".format(
                        pid, pid[-4:], prefix[slice])
                    meta_list = [
                        pid[-4:], slice, prefix[slice], nodule_name, mask_name,
                        0, False, True
                    ]
                    self.save_meta(meta_list)
                    np.save(patient_clean_dir_image / nodule_name,
                            lung_segmented_np_array)
                    np.save(patient_clean_dir_mask / mask_name, lung_mask)

        print("Saved Meta data")
        self.meta.to_csv(self.meta_path + 'meta_info.csv', index=False)
Esempio n. 10
0
            if (nods[i][int(len(nods[i]) / 2)].malignancy == 2):
                print("unlikely cancerous")
                savefile = savefile + r"\unlikely cancerous"

            if (nods[i][int(len(nods[i]) / 2)].malignancy == 1):
                print("safe")
                savefile = savefile + r"\safe"

            if os.path.isdir(savefile) == False:
                os.makedirs(savefile)

            print(savefile)

            cmask, cbbox, masks = consensus(nods[i],
                                            clevel=0.5,
                                            pad=[(20, 20), (20, 20), (0, 0)])
            k = int(0.5 * (cbbox[2].stop - cbbox[2].start))
            fig, ax = plt.subplots(1, 1, figsize=(5, 5))
            ax.imshow(vol[cbbox][:, :, k], cmap=plt.cm.gray, alpha=1.0)

            # Plot the annotation contours for the kth slice.
            colors = ['r', 'g', 'b', 'y']
            for j in range(len(masks)):
                for c in find_contours(masks[j][:, :, k].astype(float), 0.5):
                    label = "Annotation %d" % (j + 1)
                    #plt.plot(c[:,1], c[:,0], colors[j], label=label)

            ax.axis('off')
            #ax.legend()
            plt.tight_layout()
Esempio n. 11
0
def preprocess_lidc(src: Path,
                    dest: Path,
                    sample: Union[Sequence[str], bool] = False,
                    nod_size: Tuple[int] = (100, 100, 60)):
    """Preprocesses the LIDC-IDRI dataset after being downloaded from TCIA.

    Args:
        src (Path): Path to directory where the DICOM folders reside.
        dest (Path): Path to which volumes, masks and metadata should be written.
        sample_size (int): Sample size. Mainly used for testing. Defaults to False
        nod_size (Tuple[int]): Size of extracted nodule volumes. Defaults to (100, 100, 60) pixels.
    """
    img_path = dest / "images"
    img_path.mkdir(parents=True, exist_ok=True)
    mask_path = dest / "masks"
    mask_path.mkdir(parents=True, exist_ok=True)
    nod_path = dest / "nodules"
    nod_path.mkdir(parents=True, exist_ok=True)
    meta_path = dest / "meta"
    meta_path.mkdir(parents=True, exist_ok=True)

    pids = get_pids(src)
    scan_data = []
    nod_data = []
    if sample:
        pids = sample

    for pid in tqdm(pids):
        scan = get_scan(pid)
        scan_meta = get_scan_meta(scan)
        scan_data.append(scan_meta)

        vol = scan.to_volume(verbose=False)
        np.save(img_path / f"{pid}.npy", vol.astype(np.int16))
        ann_clusters = scan.cluster_annotations(verbose=False)
        masks = [np.zeros(vol.shape, dtype=np.uint8)]

        for i, cluster in enumerate(ann_clusters):
            # pad whole image for segmentation mask
            pad_sz = int(np.max(vol.shape))
            _, bbox = consensus(cluster, ret_masks=False)
            mask, _ = consensus(cluster, ret_masks=False, pad=pad_sz)
            # calc padding for nodule volume
            nod_pad_sz = [
                (math.ceil(i / 2), math.floor(i / 2))
                for i in (np.array(nod_size) - np.array(vol[bbox].shape))
            ]
            _, pbbox = consensus(cluster, ret_masks=False, pad=nod_pad_sz)
            nod_vol = vol[pbbox]
            np.save(nod_path / f"{pid}_{i}.npy", nod_vol.astype(np.int16))
            nod_meta = get_nod_meta(scan, cluster, i, bbox)
            nod_data.append(nod_meta)
            masks.append(mask)

        mask = reduce(np.logical_or, masks)
        np.save(mask_path / f"{pid}.npy", mask.astype(np.uint8))

    scan_df = pd.DataFrame(data=scan_data)
    scan_df.to_csv(meta_path / "scans.csv", index=False)
    nod_df = pd.DataFrame(data=nod_data)
    nod_df.to_csv(meta_path / "nodules.csv", index=False)
    return
Esempio n. 12
0
    def __prepare_nodule_list(self,
                              cluster_list: List[List[pylidc.Annotation]]):
        lidc_nodule_config = {
            "diam_interval": self.diam_interval,
            "extract_size_mm": self.extract_size_mm,
            "mask_dilation_iters": self.mask_dilation_iters,
        }
        nodule_pickle_exists = os.path.exists(self.nodule_list_pickle_path)
        snapshot_exists = config_snapshot(
            "lidc_nodule", lidc_nodule_config,
            "./src/data/aux/.lidcnod_config_snapshot.json")
        if not nodule_pickle_exists or not snapshot_exists:
            nodule_list = []
            _tqdm_kwargs = {
                "desc": "Preparing LIDC nodule list",
                "total": len(cluster_list)
            }
            for i, cluster in tqdm(enumerate(cluster_list), **_tqdm_kwargs):
                # Check if all annotations belong to the same scan
                if len(np.unique([ann.scan.id for ann in cluster])) != 1:
                    logger.warning(
                        f"annotations not from the same scans! skip")
                    continue

                nodule_diam = np.mean([ann.diameter for ann in cluster])
                texture_scores = [ann.texture for ann in cluster]
                # Skip nodules out of diam interval and with amiguous texture scores
                if (nodule_diam < self.diam_interval[0]
                        or nodule_diam >= self.diam_interval[1]
                        or not_valid_score(texture_scores)):
                    continue

                # Minimal possible bbox size (in mm).
                minsize = max([max(cl.bbox_dims(pad=None)) for cl in cluster])
                pad_mm = max(float(self.extract_size_mm), minsize)
                nodule_mask, nodule_bbox = consensus(cluster,
                                                     clevel=0.8,
                                                     pad=pad_mm,
                                                     ret_masks=False)
                dilated_nodule_mask = binary_dilation(
                    nodule_mask, iterations=self.mask_dilation_iters)
                nodule_coords = np.mean([ann.centroid for ann in cluster],
                                        axis=0)
                nodule_diam = np.mean([ann.diameter for ann in cluster])
                nodule_texture = mode(texture_scores).mode.item()

                nodule = LIDCNodule(
                    pylidc_scan=cluster[0].scan,
                    bbox=nodule_bbox,
                    mask=dilated_nodule_mask,
                    centroid=nodule_coords,
                    diameter=nodule_diam,
                    texture=nodule_texture,
                )
                nodule_list.append(nodule)

            logger.info("pickling LIDC nodule list for future use")
            with open(self.nodule_list_pickle_path, "wb") as f:
                pickle.dump(nodule_list, f)
        else:
            with open(self.nodule_list_pickle_path, "rb") as f:
                nodule_list = pickle.load(f)
        return nodule_list
Esempio n. 13
0
for i in range(1, 1011):
    pid = pid_prefix + str(i).zfill(4)

    # get scan
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()

    # get image
    try:
        vol = scan.to_volume().astype(np.int16)
    except:
        continue
    print(vol.shape)
    print(np.max(vol), np.min(vol))

    # get nodule annotation
    nods = scan.cluster_annotations()

    label = np.zeros_like(vol, dtype=np.uint8)
    if len(nods) != 0:
        for nod in nods:
            cmask, cbbox, masks = consensus(nod, clevel=0.5)
            if np.sum(cmask) > 114:  # filter the nodules with a radius < 3mm
                label[cbbox] = cmask.astype(np.float32)
        if np.sum(label) != 0:
            vol = np.transpose(vol, (2, 0, 1))
            label = np.transpose(label, (2, 0, 1))
            save_as_hdf5(vol, os.path.join(save_path, pid + '.hdf5'), 'image')
            save_as_hdf5(label, os.path.join(save_path, pid + '.hdf5'),
                         'label')
            assert list(np.unique(label)) == [0, 1]
            print('%s done !' % pid)