def main(hdf_file: str, data_dir: str): keys = [ FileTypes.T1, FileTypes.T2, FileTypes.GT, FileTypes.MASK, FileTypes.AGE, FileTypes.GPA, FileTypes.SEX ] crawler = pymia_load.FileSystemDataCrawler(data_dir, keys, DataSetFilePathGenerator(), DirectoryFilter(), '.mha') subjects = [ Subject(id_, file_dict) for id_, file_dict in crawler.data.items() ] if os.path.exists(hdf_file): os.remove(hdf_file) with pymia_crt.get_writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) # add a transform to normalize the images transform = pymia_tfm.IntensityNormalization(loop_axis=3, entries=('images', )) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform)
def main(hdf_file: str, data_dir: str): keys = [ FileTypes.T1, FileTypes.T2, FileTypes.GT, FileTypes.MASK, FileTypes.AGE, FileTypes.GPA, FileTypes.SEX ] crawler = pymia_load.FileSystemDataCrawler(data_dir, keys, DataSetFilePathGenerator(), DirectoryFilter(), '.mha') subjects = [ Subject(id_, file_dict) for id_, file_dict in crawler.data.items() ] if os.path.exists(hdf_file): os.remove(hdf_file) with pymia_crt.get_writer(hdf_file) as writer: callbacks = pymia_crt.get_default_callbacks(writer) # normalize the images and unsqueeze the labels and mask. # Unsqueeze is needed due to the convention to have the number of channels as last dimension. # I.e., here we have the shape 10 x 256 x 256 before the unsqueeze operation and after 10 x 256 x 256 x 1 transform = pymia_tfm.ComposeTransform([ pymia_tfm.IntensityNormalization(loop_axis=3, entries=('images', )), pymia_tfm.UnSqueeze(entries=('labels', 'mask')) ]) traverser = pymia_crt.SubjectFileTraverser() traverser.traverse(subjects, callback=callbacks, load=LoadData(), transform=transform)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'hog_feature': True, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # store preprocessed images to file file_id = open('data_train.pckl', 'wb') pickle.dump(data_train, file_id) file_id.close() file_id = open('labels_train.pckl', 'wb') pickle.dump(labels_train, file_id) file_id.close() print('-' * 5, 'Preprocessed images stored')
def collect_image_paths(data_dir): image_keys = [structure.BrainImageTypes.T1w, structure.BrainImageTypes.GroundTruth] class MyFilePathGenerator(load.FilePathGenerator): @staticmethod def get_full_file_path(id_: str, root_dir: str, file_key, file_extension: str) -> str: if file_key == structure.BrainImageTypes.T1w: file_name = 'T1native' elif file_key == structure.BrainImageTypes.GroundTruth: file_name = 'labels_native' else: raise ValueError('Unknown key') return os.path.join(root_dir, file_name + file_extension) dir_filter = futil.DataDirectoryFilter() # todo: create an instance of load.FileSystemDataCrawler and pass the correpsonding arguments crawler = load.FileSystemDataCrawler(data_dir, image_keys, MyFilePathGenerator()) # todo: modify here return crawler
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': False, 'intensity_feature': False, 'gradient_intensity_feature': False, 'hog_feature': False, 'canny_feature': False, 'secondOrder_feature': True, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=25) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') print(forest.feature_importances_) # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() print(np.sum(np.isnan(img.feature_matrix[0]), axis=0)) print(img.feature_matrix[0].shape) print(np.sum(np.isnan(img.feature_matrix[0]), axis=1)) print(np.sum(np.isinf(img.feature_matrix[0]), axis=0)) print(img.feature_matrix[0].shape) print(np.sum(np.isinf(img.feature_matrix[0]), axis=1)) predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint64), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': False} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ seed = 42 random.seed(seed) np.random.seed(seed) # load atlas images putil.load_atlas_images(data_atlas_dir) #atlas_creation() #putil.load_atlas_custom_images(data_train_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # warnings.warn('Random forest parameters not properly set.') # we modified the number of decision trees in the forest to be 20 and the maximum tree depth to be 25 # note, however, that these settings might not be the optimal ones... forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=5, max_depth=10) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'simple_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } #[0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} print('-' * 5, 'Testing...') # load classifier file_id = open('svm_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') svm_rbf_classifier = pickle.load(file_id) file_id.close() file_id = open('scaler_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') scaler = pickle.load(file_id) file_id.close() # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) start_time = timeit.default_timer() predictions = svm_rbf_classifier.predict(scaled_features) #probabilities = svm_classifier.predict_proba(img.feature_matrix[0]) #predictions = forest.predict(img.feature_matrix[0]) #probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) #image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) #images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing #post_process_params = {'crf_post': False} #images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, # post_process_params, multi_process=True) for i, img in enumerate(images_test): # evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], # img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join( result_dir, images_test[i].id_ + '_SEG_SVM_fullset_C15-_G5_lotofpointspersample.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, ml_method: str, verbose: bool): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training '+ ml_method + '...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = {'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() if verbose: util.print_class_count(labels_train) start_time = timeit.default_timer() if ml_method == 'random_forest': classifier = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=25) data_train_scaled = data_train # do not scale features to keep original RF elif ml_method == 'svm_linear': classifier = svm.SVC(kernel='linear', C=1, class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'svm_rbf': classifier = svm.SVC(kernel='rbf', C=15, gamma=5, class_weight='balanced', decision_function_shape='ovo') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'logistic_regression': classifier = linear_model.LogisticRegression(class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) else: assert False, "No valid segmentation algorithm selected in argument ml_method" classifier.fit(data_train_scaled, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) # print and plot feature importance for each structure if verbose: if ml_method == 'svm_linear': util.print_feature_importance(classifier.coef_) util.plot_feature_importance(classifier.coef_, result_dir) if ml_method == 'random_forest': util.print_feature_importance(classifier.feature_importances_) util.plot_feature_importance(classifier.feature_importances_, result_dir) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() if ml_method == 'random_forest': scaled_features = img.feature_matrix[0] else: scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) predictions = classifier.predict(scaled_features) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) probabilities = classifier.predict_proba(scaled_features) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) images_probabilities.append(image_probabilities) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': False} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022] } #[0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # compute sum over all ground truth images Label = putil.LabelImageTypes.AMYGDALA img_summed = util.compute_label_dist(images, Label) # save results sitk.WriteImage( img_summed, os.path.join(result_dir, 'groundtruth_sum_' + Label.name + '.mha'), True) # store preprocessed images to file file_id = open('data_train_reduced2.pckl', 'wb') pickle.dump(data_train, file_id) file_id.close() file_id = open('labels_train_reduced.pckl', 'wb') pickle.dump(labels_train, file_id) file_id.close() print('-' * 5, 'Preprocessed images stored') # n, m = img_summed.shape # x = np.arange(0, n-1, 1) # y = np.arange(0, m-1, 1) # meshgrid = np.meshgrid(x,y, sparse=False) # plt.pyplot.contour(meshgrid, img_summed) #printing out how much labels of each group were taken by the mask util.print_class_count(labels_train)