def run(self, output_filename: str): source_paths_t2 = list() source_paths_t1 = list() target_paths = list() for subject in sorted(os.listdir(os.path.join(self._source_dir))): source_paths_t1.append( extract_file_paths( os.path.join(self._source_dir, subject, "T1"))) source_paths_t2.append( extract_file_paths( os.path.join(self._source_dir, subject, "T2"))) target_paths.append( extract_file_paths( os.path.join(self._source_dir, subject, "Labels"))) subjects = np.arange(1, 11) source_paths_t1 = natural_sort( [item for sublist in source_paths_t1 for item in sublist]) source_paths_t2 = natural_sort( [item for sublist in source_paths_t2 for item in sublist]) target_paths = natural_sort( [item for sublist in target_paths for item in sublist]) with open(os.path.join(self._output_dir, output_filename), mode='a+') as output_file: writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([ "T1", "T2", "labels", "subject", "T1_min", "T1_max", "T1_mean", "T1_std", "T2_min", "T2_max", "T2_mean", "T2_std" ]) for source_path, source_path_t2, target_path, subject in zip( source_paths_t1, source_paths_t2, target_paths, subjects): self.LOGGER.info("Processing file {}".format(source_path)) t1 = ToNumpyArray()(source_path) t2 = ToNumpyArray()(source_path_t2) csv_data = np.vstack((source_path, source_path_t2, target_path, subject, str(t1.min()), str(t1.max()), str(t1.mean()), str(t1.std()), str(t2.min()), str(t2.max()), str(t2.mean()), str(t2.std()))) for item in range(csv_data.shape[1]): writer.writerow([ csv_data[0][item], csv_data[1][item], csv_data[2][item], csv_data[3][item], csv_data[4][item], csv_data[5][item], csv_data[6][item], csv_data[7][item], csv_data[8][item], csv_data[9][item], csv_data[10][item], csv_data[11][item] ]) output_file.close()
def run(self): images_T1 = natural_sort( extract_file_paths(os.path.join(self._root_dir, "T1"))) labels = natural_sort( extract_file_paths(os.path.join(self._root_dir, "label"))) files = np.stack((np.array(images_T1), np.array(labels)), axis=1) # self._dispatch_jobs(files, 5) self._do_job(files)
def run(self, prefix: str = ""): images_T1 = natural_sort( extract_file_paths(os.path.join(self._root_dirs["iSEG"], "T1"))) labels = natural_sort( extract_file_paths(os.path.join(self._root_dirs["iSEG"], "label"))) files = np.stack((np.array(images_T1), np.array(labels)), axis=1) self._dataset_mean_iSEG = np.mean( self._dispatch_jobs_in_pool(files, 5, self._get_mean_iseg)) self._dataset_std_iSEG = np.mean( self._dispatch_jobs_in_pool(files, 5, self._get_std_iseg)) files = list() for subject in sorted( os.listdir(os.path.join(self._root_dirs["MRBrainS"]))): files.append( extract_file_paths( os.path.join(self._root_dirs["MRBrainS"], subject))) self._dataset_mean_MRBrainS = np.mean( self._dispatch_jobs_in_pool(files, 5, self._get_mean_mrbrains)) self._dataset_std_MRBrainS = np.mean( self._dispatch_jobs_in_pool(files, 5, self._get_std_mrbrains)) files = pandas.read_csv(self._root_dirs["ABIDE"]) images_T1 = np.asarray(files["T1"]) labels = np.asarray(files["labels"]) files = np.stack((np.array(images_T1), np.array(labels)), axis=1) self._dataset_mean_ABIDE = np.mean( self._dispatch_jobs_in_pool(files, 8, self._get_mean_abide)) self._dataset_std_ABIDE = np.mean( self._dispatch_jobs_in_pool(files, 8, self._get_std_abide)) print("Triple Dataset mean: {}".format( np.mean([ self._dataset_mean_iSEG, self._dataset_mean_MRBrainS, self._dataset_mean_ABIDE ]))) print("Triple Dataset std: {}".format( np.sqrt( np.mean([ self._dataset_std_iSEG, self._dataset_std_MRBrainS, self._dataset_std_ABIDE ])))) print("Dual Dataset mean: {}".format( np.mean([self._dataset_mean_iSEG, self._dataset_mean_MRBrainS]))) print("Dual Dataset std: {}".format( np.sqrt( np.mean([self._dataset_std_iSEG, self._dataset_std_MRBrainS]))))
def get_mrbrains_train_valid_test_paths(self, modalities): subjects = np.array(self._csv["subjects"].drop_duplicates().tolist()) train_subjects, valid_subjects = iSEGSegmentationFactory.shuffle_split( subjects, self._test_size) valid_subjects, test_subjects = iSEGSegmentationFactory.shuffle_split( valid_subjects, self._test_size) reconstruction_subject = test_subjects[np.random.choice( np.arange(0, len(test_subjects)), len(test_subjects), replace=False)] filtered_csv = self._csv.loc[self._csv["center_class"].isin([1, 2, 3])] train_csv = filtered_csv[filtered_csv["subjects"].isin(train_subjects)] valid_csv = filtered_csv[filtered_csv["subjects"].isin(valid_subjects)] test_csv = filtered_csv[filtered_csv["subjects"].isin(test_subjects)] reconstruction_csv = self._csv[self._csv["subjects"].isin( reconstruction_subject)] train_source_paths, train_target_paths = (np.stack( [ natural_sort(list(train_csv[str(modality)])) for modality in modalities ], axis=1), np.array(natural_sort(list( train_csv["LabelsForTesting"])))) valid_source_paths, valid_target_paths = (np.stack( [ natural_sort(list(valid_csv[str(modality)])) for modality in modalities ], axis=1), np.array(natural_sort(list( valid_csv["LabelsForTesting"])))) test_source_paths, test_target_paths = (np.stack( [ natural_sort(list(test_csv[str(modality)])) for modality in modalities ], axis=1), np.array(natural_sort(list( test_csv["LabelsForTesting"])))) reconstruction_source_paths, reconstruction_target_paths = ( np.stack([ natural_sort(list(reconstruction_csv[str(modality)])) for modality in modalities ], axis=1), np.array(natural_sort(list( reconstruction_csv["LabelsForTesting"])))) return (train_source_paths, train_target_paths), \ (valid_source_paths, valid_target_paths), \ (test_source_paths, test_target_paths), \ (reconstruction_source_paths, reconstruction_target_paths)
def setUp(self) -> None: paths = extract_file_paths(self.PATH) self._dataset = MRBrainSSegmentationFactory.create( natural_sort(paths), None, modalities=Modality.T1, dataset_id=0) self._reconstructor = ImageReconstructor([256, 256, 192], [1, 32, 32, 32], [1, 8, 8, 8]) transforms = Compose( [ToNumpyArray(), PadToPatchShape([1, 32, 32, 32], [1, 8, 8, 8])]) self._full_image = transforms(self.FULL_IMAGE_PATH)