def __getitem__(self, idx): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") exam = self.exams_df.iloc[int(idx)] label = exam['label'] exam_id = exam['exam_id'] patient_id = exam['patient_id'] report = self._get_report(exam, dataset) report = self.vocab.tokenize(report) if self.max_len is not None and self.max_len < len(report): report = report[:self.max_len] report = self.vocab.wrap_sentence(report) report, mask_labels = self._mask_inputs(report) inputs = {"report": report} targets = {"mlm": mask_labels, "abnorm": torch.tensor(label)} info = {"exam_id": exam_id, "patient_id": patient_id} return inputs, targets, info
def __getitem__(self, idx): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") exam = self.exams_df.iloc[int(idx)] label, exam_id, patient_id = exam['label'], exam['exam_id'], exam[ 'patient_id'] report = self._get_report(exam, dataset) images = self._get_images(exam, dataset) if not self.skip_scans else None targets = {} # must perform scan_match first if "scan_match" in self.task_configs: args = self.task_configs["scan_match"]["args"] report, labels = self.scan_match(exam_id, report, dataset, **args) targets["scan_match"] = labels # tokenize, trim if over max length, and wrap sentence report = self.vocab.tokenize(report) if self.max_len is not None and self.max_len < len(report): report = report[:self.max_len] report = self.vocab.wrap_sentence(report) if "scan_mlm" in self.task_configs: args = self.task_configs["scan_mlm"]["args"] report, labels = self.scan_mlm(exam_id, report, **args) targets["scan_mlm"] = labels info = {"exam_id": exam_id, "patient_id": patient_id} inputs = {"report": report, "scan": images} return inputs, targets, info
def get_targets(self, tasks=["abnormality_detection", "report_generation"]): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") for idx, exam in self.exams_df.iterrows(): yield self._get_targets(exam, dataset, tasks=tasks)
def get_report(self, exam_id: str, split: str="valid"): """ """ h5_dataset = H5Dataset(self.datasets[split].dataset_name, self.datasets[split].data_dir, mode="read") report = h5_dataset.read_reports(exam_id) return report
def __getitem__(self, idx): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") exam = self.exams_df.iloc[int(idx)] label = exam['label'] exam_id = exam['exam_id'] patient_id = exam['patient_id'] report = self._get_report(exam, dataset) return label, report, {"exam_id": exam_id, "patient_id": patient_id}
def __getitem__(self, idx): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") exam = self.exams_df.iloc[int(idx)] label = exam['label'] exam_id = exam['exam_id'] patient_id = exam['patient_id'] inputs = self._get_images(exam, dataset) targets = torch.tensor(label) del dataset return inputs, targets, {"exam_id": exam_id, "patient_id": patient_id}
def __getitem__(self, idx): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") exam = self.exams_df.loc[int(idx)] label = exam['label'] exam_id = exam['exam_id'] patient_id = exam['patient_id'] images = self._get_images(exam, dataset) targets = self._get_targets(exam) info = {"exam_id": exam_id, "patient_id": patient_id, "idx": idx} return images, targets, info
def get_targets(self, tasks=[]): """ """ dataset = H5Dataset(self.dataset_name, self.data_dir, mode="read") for idx, exam in self.exams_df.iterrows(): yield self._get_targets(exam, dataset, tasks=tasks)
def show_saliency_slice( exam_id: str, image_slice: int, show_pet: bool = False, saliency: np.array = None, data_dir: str = "/data/fdg-pet-ct", dataset_name: str = "pet_ct_dataset", ct_hu_range: tuple = 0, pet_percentile: float = 0, saliency_percentile: float = 0, process_dir: str = None, ): """ ### <<EXPERIMENT SPEC>> ### # axillary #experiment_dir = "experiments/manuscript/_seed/single_task/08-01_12-42_st-pretrain_mt_full_26_e/candidates/exp_15" #cervical #experiment_dir = "experiments/manuscript/_seed/single_task/08-01_12-42_st-pretrain_mt_full_26_a/candidates/exp_19" #inguinal experiment_dir = "experiments/manuscript/_seed/single_task/08-01_12-42_st-pretrain_mt_full_26_a/candidates/exp_5" """ dataset = H5Dataset(dataset_name, data_dir, mode="read") ct = np.array(dataset.read_images(exam_id, "CT Images"))[image_slice] pet = np.array(dataset.read_images(exam_id, "PET_BODY_CTAC"))[image_slice] plt.figure(figsize=(10, 10)) print(ct.max()) print(ct.min()) # plot ct plt.imshow( plt.cm.bone( plt.Normalize(vmin=ct_hu_range[0], vmax=ct_hu_range[1], clip=True,)(ct) ) ) # plot pet # set alpha based on pet instensity if show_pet: pet = cv2.resize(pet, ct.shape) alphas = plt.Normalize(clip=True)(np.abs(pet)) pet = plt.cm.plasma( plt.Normalize(vmin=np.percentile(pet, pet_percentile), clip=True)(pet) ) pet[..., -1] = alphas plt.imshow(pet) # plot saliency if saliency is not None: saliency = saliency.max(axis=4, keepdims=False) saliency = saliency - saliency.min() saliency /= saliency.max() saliency = saliency[0, image_slice] saliency = cv2.resize(saliency, ct.shape) alphas = plt.Normalize( vmin=np.percentile(saliency, saliency_percentile), clip=True )(np.abs(saliency)) saliency = plt.cm.viridis(plt.Normalize()(saliency)) saliency[..., -1] = alphas plt.imshow(saliency) if process_dir is not None: plt.savefig(os.path.join(process_dir, "scan.png"))