def __prepare_tensor_dataset(self): tensor_dataset_path = os.path.join( self.metaconf["ws_path"], "tensor_datasets", self.dataset_params.tensor_dataset_name) # compare configs, if not same, refresh dataset current_config_snapshot_exists = H.config_snapshot( "dataset_params", self.dataset_params.params, "src/data/aux/.dataset_config_snapshot.json", ) if not current_config_snapshot_exists: H.makedirs(tensor_dataset_path) _tqdm_kwargs = { "desc": "Preparing TensorDataset", "total": len(self.generic_dataset) } for i, sample in tqdm(enumerate(self.generic_dataset), **_tqdm_kwargs): f_folder_path = os.path.join(tensor_dataset_path, f"{sample['texture']}") H.makedirs(f_folder_path) f_path = os.path.join(f_folder_path, f"nodule_{i}.pt") save_nodules = { "nodule": sample["nodule"], "texture": sample["texture"] } torch.save(save_nodules, f_path) return tensor_dataset_path
def __prepare_nodules_annotations(self): """Search through pylidc database for annotations, make clusters of anns corresponged to same nodules and forms list of clusters. """ # Prepare or load annotations clustered for each nodule lidc_ann_config = { "annotations_number_required": self.annotations_number_required } ann_snapshot_exists = config_snapshot( "lidc_ann", lidc_ann_config, "./src/data/aux/.lidcann_config_snapshot.json") ann_pickle_exists = os.path.exists(self.cluster_list_pickle_path) if not ann_pickle_exists or not ann_snapshot_exists: cluster_list = [] for series in tqdm(pylidc.query(pylidc.Scan).all(), desc="Preparing LIDC annotations list"): clusters = series.cluster_annotations(verbose=False) # We take only nodules with >=3 annotations for robustness. clusters = [ c for c in clusters if len(c) >= self.annotations_number_required ] if len(clusters) > 0: cluster_list.append(clusters) # Flatten cluster_list cluster_list = [c for cl in cluster_list for c in cl] # Dump cluster_list for future use logger.info("pickling LIDC annotation list for future use") with open(self.cluster_list_pickle_path, "wb") as f: pickle.dump(cluster_list, f) else: with open(self.cluster_list_pickle_path, "rb") as f: cluster_list = pickle.load(f) return cluster_list
def __prepare_nodule_list(self, cluster_list: List[List[pylidc.Annotation]]): lidc_nodule_config = { "diam_interval": self.diam_interval, "extract_size_mm": self.extract_size_mm, "mask_dilation_iters": self.mask_dilation_iters, } nodule_pickle_exists = os.path.exists(self.nodule_list_pickle_path) snapshot_exists = config_snapshot( "lidc_nodule", lidc_nodule_config, "./src/data/aux/.lidcnod_config_snapshot.json") if not nodule_pickle_exists or not snapshot_exists: nodule_list = [] _tqdm_kwargs = { "desc": "Preparing LIDC nodule list", "total": len(cluster_list) } for i, cluster in tqdm(enumerate(cluster_list), **_tqdm_kwargs): # Check if all annotations belong to the same scan if len(np.unique([ann.scan.id for ann in cluster])) != 1: logger.warning( f"annotations not from the same scans! skip") continue nodule_diam = np.mean([ann.diameter for ann in cluster]) texture_scores = [ann.texture for ann in cluster] # Skip nodules out of diam interval and with amiguous texture scores if (nodule_diam < self.diam_interval[0] or nodule_diam >= self.diam_interval[1] or not_valid_score(texture_scores)): continue # Minimal possible bbox size (in mm). minsize = max([max(cl.bbox_dims(pad=None)) for cl in cluster]) pad_mm = max(float(self.extract_size_mm), minsize) nodule_mask, nodule_bbox = consensus(cluster, clevel=0.8, pad=pad_mm, ret_masks=False) dilated_nodule_mask = binary_dilation( nodule_mask, iterations=self.mask_dilation_iters) nodule_coords = np.mean([ann.centroid for ann in cluster], axis=0) nodule_diam = np.mean([ann.diameter for ann in cluster]) nodule_texture = mode(texture_scores).mode.item() nodule = LIDCNodule( pylidc_scan=cluster[0].scan, bbox=nodule_bbox, mask=dilated_nodule_mask, centroid=nodule_coords, diameter=nodule_diam, texture=nodule_texture, ) nodule_list.append(nodule) logger.info("pickling LIDC nodule list for future use") with open(self.nodule_list_pickle_path, "wb") as f: pickle.dump(nodule_list, f) else: with open(self.nodule_list_pickle_path, "rb") as f: nodule_list = pickle.load(f) return nodule_list