def collect_image_paths(data_dir): image_keys = [ structure.BrainImageTypes.T1w, structure.BrainImageTypes.GroundTruth ] class MyFilePathGenerator(futil.FilePathGenerator): @staticmethod def get_full_file_path(id_: str, root_dir: str, file_key, file_extension: str) -> str: if file_key == structure.BrainImageTypes.T1w: file_name = 'T1native' elif file_key == structure.BrainImageTypes.GroundTruth: file_name = 'labels_native' else: raise ValueError('Unknown key') return os.path.join(root_dir, file_name + file_extension) dir_filter = futil.DataDirectoryFilter() # todo: create an instance of futil.FileSystemDataCrawler and pass the correpsonding arguments crawler = futil.FileSystemDataCrawler('../data/exercise/', image_keys, MyFilePathGenerator(), dir_filter, '.nii.gz') # todo: modify here return crawler
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'hog_feature': True, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # store preprocessed images to file file_id = open('data_train.pckl', 'wb') pickle.dump(data_train, file_id) file_id.close() file_id = open('labels_train.pckl', 'wb') pickle.dump(labels_train, file_id) file_id.close() print('-' * 5, 'Preprocessed images stored')
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, tmp_result_dir: str): """Brain tissue segmentation using decision forests. Section of the original main routine. Executes post processing part of the medical image analysis pipeline: Must be done separately in advance: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images Is carried out in this section of the pipeline - Loading of temporary data - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) # print('-' * 5, 'Training...') # # # crawl the training image directories # crawler = futil.FileSystemDataCrawler(data_train_dir, # LOADING_KEYS, # futil.BrainImageFilePathGenerator(), # futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator() # crawl the test image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load necessary data to perform post processing pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # load the prediction of the test images (segmented image images_prediction, images_probabilities = putil.load_prediction_images( images_test, tmp_result_dir, '2020-10-30-18-31-15') # evaluate images without post-processing for i, img in enumerate(images_test): evaluator.evaluate(images_prediction[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_) # post-process segmentation and evaluate with post-processing post_process_params = { 'simple_post': True, 'variance': 1.0, 'preserve_background': False } images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=False) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # use two writers to report the results os.makedirs( result_dir, exist_ok=True) # generate result directory, if it does not exists result_file = os.path.join(result_dir, 'results.csv') writer.CSVWriter(result_file).write(evaluator.results) print('\nSubject-wise results...') writer.ConsoleWriter().write(evaluator.results) # report also mean and standard deviation among all subjects result_summary_file = os.path.join(result_dir, 'results_summary.csv') functions = {'MEAN': np.mean, 'STD': np.std} writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results) print('\nAggregated statistic results...') writer.ConsoleStatisticsWriter(functions=functions).write( evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear()
def main(_): """Ensemble using results from various algorithms """ # load results from various previous runs all_probabilities = None for r in RESULTS: p = np.load(os.path.join(r, 'all_probabilities.npy')) if all_probabilities is None: all_probabilities = p else: if p.shape != all_probabilities.shape: print('Error: all_probabilities.npy do not match: ' + str(p.shape) + ' vs. ' + str(all_probabilities.shape) + ' for ' + r) sys.exit(1) if ENSEMBLE_MAX: all_probabilities = np.maximum(all_probabilities, p) else: all_probabilities = all_probabilities + p if ENSEMBLE_MAX == False: all_probabilities = all_probabilities / len(r) # convert back to float32 all_probabilities = all_probabilities.astype(np.float32) # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) index = 0 for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() probabilities = all_probabilities[index, :, :] index = index + 1 predictions = LABEL_CLASSES[probabilities.argmax(axis=1)] print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('Ensemble from ' + str(RESULTS), file=summary_file) print('ENSEMBLE_MAX ' + str(ENSEMBLE_MAX), file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ seed = 42 random.seed(seed) np.random.seed(seed) # load atlas images putil.load_atlas_images(data_atlas_dir) #atlas_creation() #putil.load_atlas_custom_images(data_train_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # warnings.warn('Random forest parameters not properly set.') # we modified the number of decision trees in the forest to be 20 and the maximum tree depth to be 25 # note, however, that these settings might not be the optimal ones... forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=5, max_depth=10) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'simple_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } #[0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} print('-' * 5, 'Testing...') # load classifier file_id = open('svm_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') svm_rbf_classifier = pickle.load(file_id) file_id.close() file_id = open('scaler_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') scaler = pickle.load(file_id) file_id.close() # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) start_time = timeit.default_timer() predictions = svm_rbf_classifier.predict(scaled_features) #probabilities = svm_classifier.predict_proba(img.feature_matrix[0]) #predictions = forest.predict(img.feature_matrix[0]) #probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) #image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) #images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing #post_process_params = {'crf_post': False} #images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, # post_process_params, multi_process=True) for i, img in enumerate(images_test): # evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], # img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join( result_dir, images_test[i].id_ + '_SEG_SVM_fullset_C15-_G5_lotofpointspersample.mha'), True)
def main(_): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) train_data_size = len(data_items) pre_process_params = { 'zscore_pre': True, #1 features 'coordinates_feature': False, #3 features 'intensity_feature': True, #1 features 'gradient_intensity_feature': True } #2 features start_time_total_train = timeit.default_timer() n_neighbors = 20 batch_data = dict(data_items) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]) if NORMALIZE_FEATURES: # normalize data (mean 0, std 1) data_train = scipy_stats.zscore(data_train) start_time = timeit.default_timer() neigh = KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance', algorithm='auto').fit(data_train, labels_train[:, 0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train start_time_total_test = timeit.default_timer() print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) all_probabilities = None for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() # probabilities, predictions = forest.predict(img.feature_matrix[0]) features = img.feature_matrix[0] if NORMALIZE_FEATURES: features = scipy_stats.zscore(features) predictions = neigh.predict(features) probabilities = neigh.predict_proba(features) if all_probabilities is None: all_probabilities = np.array([probabilities]) else: all_probabilities = np.concatenate( (all_probabilities, [probabilities]), axis=0) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) time_total_test = timeit.default_timer() - start_time_total_test # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('Training data size: {}'.format(train_data_size), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Total testing time: {:.1f}s'.format(time_total_test), file=summary_file) print('Voxel Filter Mask: {}'.format( putil.FeatureExtractor.VOXEL_MASK_FLT), file=summary_file) print('Normalize Features: {}'.format(NORMALIZE_FEATURES), file=summary_file) print('kNN', file=summary_file) print('n_neighbors: {}'.format(n_neighbors), file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() warnings.warn('Random forest parameters not properly set.') # visualization(images) print(np.shape(images[0].feature_matrix[0])) error_rate = [] for num_estimators in range(1, 10): forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=num_estimators, max_depth=10, oob_score=True) # start_time = timeit.default_timer() forest.fit(data_train, labels_train) oob_error = 1 - forest.oob_score_ print(forest.oob_score_) error_rate += [oob_error] plt.plot(range(1, 10), error_rate) plt.show()
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, ml_method: str, verbose: bool): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training '+ ml_method + '...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = {'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() if verbose: util.print_class_count(labels_train) start_time = timeit.default_timer() if ml_method == 'random_forest': classifier = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=25) data_train_scaled = data_train # do not scale features to keep original RF elif ml_method == 'svm_linear': classifier = svm.SVC(kernel='linear', C=1, class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'svm_rbf': classifier = svm.SVC(kernel='rbf', C=15, gamma=5, class_weight='balanced', decision_function_shape='ovo') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'logistic_regression': classifier = linear_model.LogisticRegression(class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) else: assert False, "No valid segmentation algorithm selected in argument ml_method" classifier.fit(data_train_scaled, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) # print and plot feature importance for each structure if verbose: if ml_method == 'svm_linear': util.print_feature_importance(classifier.coef_) util.plot_feature_importance(classifier.coef_, result_dir) if ml_method == 'random_forest': util.print_feature_importance(classifier.feature_importances_) util.plot_feature_importance(classifier.feature_importances_, result_dir) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() if ml_method == 'random_forest': scaled_features = img.feature_matrix[0] else: scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) predictions = classifier.predict(scaled_features) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) probabilities = classifier.predict_proba(scaled_features) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) images_probabilities.append(image_probabilities) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': False} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': False, 'intensity_feature': False, 'gradient_intensity_feature': False, 'hog_feature': False, 'canny_feature': False, 'secondOrder_feature': True, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=25) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') print(forest.feature_importances_) # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() print(np.sum(np.isnan(img.feature_matrix[0]), axis=0)) print(img.feature_matrix[0].shape) print(np.sum(np.isnan(img.feature_matrix[0]), axis=1)) print(np.sum(np.isinf(img.feature_matrix[0]), axis=0)) print(img.feature_matrix[0].shape) print(np.sum(np.isinf(img.feature_matrix[0]), axis=1)) predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint64), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': False} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(_): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) train_data_size = len(data_items) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # initialize decision forest parameters df_params = df.DecisionForestParameters() df_params.num_classes = 4 df_params.num_trees = 160 df_params.max_nodes = 3000 df_params.model_dir = model_dir forest = None start_time_total_train = timeit.default_timer() for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): cache_file_prefix = os.path.normpath( os.path.join( script_dir, './mia-cache/batch-' + str(batch_index) + '-' + str(TRAIN_BATCH_SIZE))) cache_file_train = cache_file_prefix + '-data_train.npy' cache_file_labels = cache_file_prefix + '-data_labels.npy' if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)): print('Using cache from ', cache_file_train) data_train = np.load(cache_file_train) labels_train = np.load(cache_file_labels) else: # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate( [img.feature_matrix[0] for img in images]) labels_train = np.concatenate( [img.feature_matrix[1] for img in images]) if NORMALIZE_FEATURES: # normalize data (mean 0, std 1) # data_train = scipy_stats.zscore(data_train) non_coord = scipy_stats.zscore(data_train[:, 3:8]) coord = data_train[:, 0:3] / 255 * 2 - 1 data_train = np.concatenate((coord, non_coord), axis=1) if (USE_PREPROCESS_CACHE): print('Writing cache') if (not os.path.exists(os.path.dirname(cache_file_prefix))): os.mkdir(os.path.dirname(cache_file_prefix)) data_train.dump(cache_file_train) labels_train.dump(cache_file_labels) if forest is None: df_params.num_features = data_train.shape[1] print(df_params) forest = df.DecisionForest(df_params) start_time = timeit.default_timer() forest.train(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train start_time_total_test = timeit.default_timer() print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) all_probabilities = None for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() features = img.feature_matrix[0] if NORMALIZE_FEATURES: # features = scipy_stats.zscore(features) non_coord = scipy_stats.zscore(features[:, 3:8]) coord = features[:, 0:3] / 255 * 2 - 1 features = np.concatenate((coord, non_coord), axis=1) probabilities, predictions = forest.predict(features) if all_probabilities is None: all_probabilities = np.array([probabilities]) else: all_probabilities = np.concatenate( (all_probabilities, [probabilities]), axis=0) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) time_total_test = timeit.default_timer() - start_time_total_test # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('Training data size: {}'.format(train_data_size), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Total testing time: {:.1f}s'.format(time_total_test), file=summary_file) print('Voxel Filter Mask: {}'.format( putil.FeatureExtractor.VOXEL_MASK_FLT), file=summary_file) print('Normalize Features: {}'.format(NORMALIZE_FEATURES), file=summary_file) print('Decision forest', file=summary_file) print(df_params, file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(_): """Brain tissue segmentation using SVM. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - SVM model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # SVM cannot deal with default mark (too much data). Reduce by factor 10 putil.FeatureExtractor.VOXEL_MASK_FLT = [0.00003, 0.0004, 0.0003, 0.0004] # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) train_data_size = len(data_items) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } start_time_total_train = timeit.default_timer() batch_data = dict(data_items) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]) if NORMALIZE_FEATURES: # normalize data (mean 0, std 1) data_train = scipy_stats.zscore(data_train) print('Start training SVM') # Training # SVM does not support online/incremental training. Need to fit all in one go! # Note: Very slow with large training set! start_time = timeit.default_timer() # to limite: max_iter=1000000000 # Enable for grid search of best hyperparameters if False: C_range = [300, 350, 400, 450, 500, 550, 600, 800, 1000, 1200, 1500] gamma_range = [ 0.00001, 0.00003, 0.00004, 0.00005, 0.00006, 0.00008, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2 ] # 1 C_range = [ 0.001, 0.01, 0.1, 0.5, 1, 3, 5, 10, 20, 50, 100, 200, 250, 300, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 120000, 150000 ] gamma_range = [ 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2, 0.5, 1, 5, 10 ] #C_range = [1, 10, 100, 500, 1000, 5000, 10000, 15000, 20000, 22000, 25000, 30000, 35000] #gamma_range = [0.00000001, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2, 0.5] params = [{ 'kernel': ['rbf'], 'C': C_range, 'gamma': gamma_range, }] #'C': [0.001, 0.01, 0.1, 0.5, 1, 3, 5, 10, 20, 50, 100, 200, 250, 300, 1000], #'gamma': [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 20, 100, 10 clf = GridSearchCV(SVC(probability=True, cache_size=2000), params, cv=2, n_jobs=8, verbose=3) clf.fit(data_train, labels_train[:, 0]) print('best param: ' + str(clf.best_params_)) scores = clf.cv_results_['mean_test_score'].reshape( len(C_range), len(gamma_range)) plt.figure(figsize=(8, 6)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot, norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.xlabel('gamma') plt.ylabel('C') plt.colorbar() plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45) plt.yticks(np.arange(len(C_range)), C_range) plt.title('Validation accuracy') plt.savefig('svm_params.png') #plt.show() scipy.io.savemat('svm_params.mat', mdict={ 'C': C_range, 'gamma': gamma_range, 'score': scores }) #svm = SVC(probability=True, kernel='rbf', C=clf.best_params_['C'], gamma=clf.best_params_['gamma'], cache_size=2000, verbose=False) svm = SVC(probability=True, kernel='rbf', C=500, gamma=0.00005, cache_size=2000, verbose=False) svm.fit(data_train, labels_train[:, 0]) print('\n Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train start_time_total_test = timeit.default_timer() print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) all_probabilities = None for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() #probabilities, predictions = forest.predict(img.feature_matrix[0]) features = img.feature_matrix[0] if NORMALIZE_FEATURES: features = scipy_stats.zscore(features) probabilities = np.array(svm.predict_proba(features)) print('probabilities: ' + str(probabilities.shape)) predictions = svm.classes_[probabilities.argmax(axis=1)] if all_probabilities is None: all_probabilities = np.array([probabilities]) else: all_probabilities = np.concatenate( (all_probabilities, [probabilities]), axis=0) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) time_total_test = timeit.default_timer() - start_time_total_test # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('SVM', file=summary_file) print('SVM params: {}'.format(svm.get_params()), file=summary_file) print('pre-process-params: {}'.format(pre_process_params), file=summary_file) print('Training data size: {}'.format(train_data_size), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Total testing time: {:.1f}s'.format(time_total_test), file=summary_file) print('Voxel Filter Mask: {}'.format( putil.FeatureExtractor.VOXEL_MASK_FLT), file=summary_file) print('Normalize Features: {}'.format(NORMALIZE_FEATURES), file=summary_file) #print('SVM best parameters', file=summary_file) #print(clf.best_params_, file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = {'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() warnings.warn('Random forest parameters not properly set.') # visualization(images) print(np.shape(images[0].feature_matrix[0])) dfs= [] aggregated_results = [] print('-' * 5, 'Testing...') crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) for num_estimator in [10]: forest = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1], n_estimators=num_estimator, max_depth=10) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories # crawler = futil.FileSystemDataCrawler(data_test_dir, # LOADING_KEYS, # futil.BrainImageFilePathGenerator(), # futil.DataDirectoryFilter()) # load images for testing and pre-process # data_test = np.concatenate([img.feature_matrix[0] for img in images_test]) # labels_test = np.concatenate([img.feature_matrix[1] for img in images_test]).squeeze() # ax = plt.gca() # rfc_disp = plot_roc_curve(forest, data_test, labels_test, ax=ax, alpha=0.8) # svc_disp.plot(ax=ax, alpha=0.8) # disp = plot_confusion_matrix(forest, data_test, labels_test, normalize='true') # plt.show() # y = label_binarize(labels_test, classes=[0, 1, 2 , 3, 4 , 5]) # n_classes = y.shape[1] images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) results=evaluator.results labels = sorted({result.label for result in results}) metrics = sorted({result.metric for result in results}) # functions = {'MEAN': np.mean, 'STD': np.std} functions = {'MEAN': np.mean} for label in labels: for metric in metrics: # search for results values = [r.value for r in results if r.label == label and r.metric == metric] for fn_id, fn in functions.items(): aggregated_results.append( [num_estimator, label, metric, float(fn(values))]) # for result in aggregated_results: # # print([result.label, result.metric, result.id_, result.value]) # print(result) # writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear() df=pd.DataFrame(aggregated_results, columns=['n_estimators', 'label', 'metric', 'value']) return df xdf = df[df.label == 'WhiteMatter'] del xdf['label'] # new_df=df[df.label=='GreyMatter'] # del new_df['label'] # new_df.set_index('n_estimators', inplace=True) # fig, ax = plt.subplots(figsize=(15, 7)) # new_df.groupby(['metric']).plot(ax=ax) # print(new_df) # plt.show() plt.figure(2) # pd.crosstab(index=[df['Name'], df['Date']], columns=new_df['metric']) my_df = pd.pivot_table(df,index=['label'], columns='metric', values='value') my_df.plot() print(my_df)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() warnings.warn('Random forest parameters not properly set.') # visualization(images) print(np.shape(images[0].feature_matrix[0])) forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=10, max_depth=10) start_time = timeit.default_timer() # forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) data_test = np.concatenate([img.feature_matrix[0] for img in images_test]) labels_test = np.concatenate( [img.feature_matrix[1] for img in images_test]).squeeze() random_state = np.random.RandomState(0) # ax = plt.gca() # rfc_disp = plot_roc_curve(forest, data_test, labels_test, ax=ax, alpha=0.8) # svc_disp.plot(ax=ax, alpha=0.8) # disp = plot_confusion_matrix(forest, data_test, labels_test, normalize='true') # plt.show() X = np.concatenate((data_train, data_test)) y = np.concatenate((labels_train, labels_test)) y = label_binarize(y, classes=[0, 1, 2, 3, 4, 5]) n_classes = y.shape[1] n_samples, n_features = X.shape X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) # classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, # random_state=random_state)) classifier = OneVsRestClassifier( sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=10, max_depth=5)) y_score = classifier.fit(X_train, y_train).predict(X_test) # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) plt.figure() lw = 2 plt.plot(fpr[2], tpr[2], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2]) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show() # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title( 'Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show() # # images_prediction = [] # images_probabilities = [] # # # for img in images_test: # print('-' * 10, 'Testing', img.id_) # # # start_time = timeit.default_timer() # predictions = forest.predict(img.feature_matrix[0]) # probabilities = forest.predict_proba(img.feature_matrix[0]) # print(' Time elapsed:', timeit.default_timer() - start_time, 's') # # # convert prediction and probabilities back to SimpleITK images # image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), # img.image_properties) # image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # # # evaluate segmentation without post-processing # evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) # # images_prediction.append(image_prediction) # images_probabilities.append(image_probabilities) # # # # # # post-process segmentation and evaluate with post-processing # post_process_params = {'simple_post': True} # images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, # post_process_params, multi_process=True) # # for i, img in enumerate(images_test): # evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], # img.id_ + '-PP') # # # save results # sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) # sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # # # use two writers to report the results # os.makedirs(result_dir, exist_ok=True) # generate result directory, if it does not exists # result_file = os.path.join(result_dir, 'results.csv') # writer.CSVWriter(result_file).write(evaluator.results) # # print('\nSubject-wise results...') # writer.ConsoleWriter().write(evaluator.results) # # # report also mean and standard deviation among all subjects # result_summary_file = os.path.join(result_dir, 'results_summary.csv') # functions = {'MEAN': np.mean, 'STD': np.std} # writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results) # print('\nAggregated statistic results...') # writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear()
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. Section of the original main routine. Executes pre-processing and prediction part of the medical image analysis pipeline and and saves the temporary data: Is carried out in this section of the pipeline - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Save prediction data """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() warnings.warn('Random forest parameters not properly set.') forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=10, max_depth=10) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # save all data used for post processing for i, img in enumerate(images_test): sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_probabilities[i], os.path.join(result_dir, images_test[i].id_ + '_PROB.mha'), True) evaluator.clear()
def main(_): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # SGD need "original" value of 0.04 for ventricles putil.FeatureExtractor.VOXEL_MASK_FLT = [0.0003, 0.004, 0.003, 0.04] # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) train_data_size = len(data_items) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # initialize decision forest parameters df_params = df.DecisionForestParameters() df_params.num_classes = 4 df_params.num_trees = 20 df_params.max_nodes = 1000 df_params.model_dir = model_dir forest = None clf = None start_time_total_train = timeit.default_timer() for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): cache_file_prefix = os.path.normpath( os.path.join( script_dir, './mia-cache/batch-' + str(batch_index) + '-' + str(TRAIN_BATCH_SIZE))) cache_file_train = cache_file_prefix + '-data_train.npy' cache_file_labels = cache_file_prefix + '-data_labels.npy' if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)): print('Using cache from ', cache_file_train) data_train = np.load(cache_file_train) labels_train = np.load(cache_file_labels) else: # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate( [img.feature_matrix[0] for img in images]) labels_train = np.concatenate( [img.feature_matrix[1] for img in images]) if NORMALIZE_FEATURES: # normalize data (mean 0, std 1) data_train = scipy_stats.zscore(data_train) if (USE_PREPROCESS_CACHE): print('Writing cache') if (not os.path.exists(os.path.dirname(cache_file_prefix))): os.mkdir(os.path.dirname(cache_file_prefix)) data_train.dump(cache_file_train) labels_train.dump(cache_file_labels) if clf is None: # cross validation to find best parameter param = [ { "eta0": [0.5, 0.1, 0.01, 0.001, 0.0001, 0.00001], "alpha": [0.5, 0.1, 0.01, 0.001, 0.0001, 0.00001], "learning_rate": ['optimal', 'constant'], "loss": ['log', 'modified_huber'] #"max_iter": [10000, 100000] }, ] # Best params: #{'alpha': 0.01, 'eta0': 0.5, 'learning_rate': 'optimal', 'loss': 'modified_huber'} n_iter = 300000 / len(data_items) sgd = SGDClassifier(learning_rate='optimal', eta0=0.5, alpha=0.01, loss='modified_huber', penalty="l2", max_iter=n_iter, n_jobs=8, shuffle=False) clf = sgd # Note: shuffle=True gives '"RuntimeWarning: overflow encountered in expnp.exp(prob, prob)"' # to try several parameters with grid search #clf = GridSearchCV(sgd, param, cv=2, n_jobs=4, verbose=3) start_time = timeit.default_timer() clf.fit(data_train, labels_train[:, 0]) #print('Best params: ') #print(clf.best_params_) print('\n training, Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train start_time_total_test = timeit.default_timer() print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) all_probabilities = None for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() #probabilities, predictions = forest.predict(img.feature_matrix[0]) features = img.feature_matrix[0] if NORMALIZE_FEATURES: features = scipy_stats.zscore(features) probabilities = np.array(clf.predict_proba(features)) print('probabilities: ' + str(probabilities.shape)) predictions = clf.classes_[probabilities.argmax(axis=1)] if all_probabilities is None: all_probabilities = np.array([probabilities]) else: all_probabilities = np.concatenate( (all_probabilities, [probabilities]), axis=0) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) time_total_test = timeit.default_timer() - start_time_total_test # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('Training data size: {}'.format(train_data_size), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Total testing time: {:.1f}s'.format(time_total_test), file=summary_file) print('Voxel Filter Mask: {}'.format( putil.FeatureExtractor.VOXEL_MASK_FLT), file=summary_file) print('Normalize Features: {}'.format(NORMALIZE_FEATURES), file=summary_file) print('SGD', file=summary_file) #print(clf.best_params_, file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file) all_probabilities.astype(np.float16).dump( os.path.join(result_dir, 'all_probabilities.npy'))
def main(_): # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): cache_file_prefix = os.path.normpath( os.path.join( script_dir, './mia-cache/batch-' + str(batch_index) + '-' + str(TRAIN_BATCH_SIZE))) cache_file_train = cache_file_prefix + '-data_train.npy' cache_file_labels = cache_file_prefix + '-data_labels.npy' if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)): print('Using cache from ', cache_file_train) data_train = np.load(cache_file_train) labels_train = np.load(cache_file_labels) else: # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) # generate feature matrix and label vector data_train = np.concatenate( [img.feature_matrix[0] for img in images]) labels_train = np.concatenate( [img.feature_matrix[1] for img in images]) # Scatter matrix plot of the train data data = pd.DataFrame(data_train, columns=[ 'Feat. 1', 'Feat. 2', 'Feat. 3', 'Feat. 4', 'Feat. 5', 'Feat. 6', 'Feat. 7' ]) axes = pd.scatter_matrix(data, alpha=0.2, diagonal='hist') corr = data.corr().as_matrix() for i, j in zip(*plt.np.triu_indices_from(axes, k=1)): axes[i, j].annotate("%.2f" % corr[i, j], (0.99, 0.98), size=23, xycoords='axes fraction', ha='right', va='top') n = len(data.columns) for x in range(n): for y in range(n): # to get the axis of subplots ax = axes[x, y] # to make x axis name vertical ax.xaxis.label.set_rotation(0) ax.xaxis.label.set_size(17) ax.xaxis.set_label_coords(0.5, -0.3) # to make y axis name horizontal ax.yaxis.label.set_rotation(0) ax.yaxis.label.set_size(17) ax.yaxis.set_label_coords(-0.3, 0.5) # to make sure y axis names are outside the plot area ax.yaxis.labelpad = 50 # plt.title('Scatter Plot Matrix', fontsize=17, y=7.1, x=-2.5) plt.show()
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. Section of the original main routine. Executes gird search of the probabilistic keyhole filling method parameters: Must be done separately in advance: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images Is carried out in this section of the pipeline - Loading of temporary data - Grid search of PKF parameter of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() #warnings.warn('Random forest parameters not properly set.') forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=85) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # save results without post-processing name = 'no_PP' sub_dir = os.path.join(result_dir, name) os.makedirs(sub_dir, exist_ok=True) for i, img in enumerate(images_test): sitk.WriteImage(images_prediction[i], os.path.join(sub_dir, images_test[i].id_ + '_SEG.mha'), True) result_file = os.path.join(sub_dir, 'results.csv') writer.CSVWriter(result_file).write(evaluator.results) # report also mean and standard deviation among all subjects result_summary_file = os.path.join(sub_dir, 'results_summary.csv') functions = {'MEAN': np.mean, 'STD': np.std} writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear() # define paramter for grid search post_process_param_list = [] variance = np.arange(1, 2) preserve_background = np.asarray([False]) # # # define paramter for grid search # post_process_param_list = [] # variance = np.arange(0.5, 4.0, 0.5) # preserve_background = np.asarray([False, True]) for bg in preserve_background: for var in variance: post_process_param_list.append({ 'simple_post': bool(True), 'variance': float(var), 'preserve_background': bool(bg) }) # execute post processing with definde parameters for post_process_params in post_process_param_list: # create sub-directory for results name = 'PP-V-'+ str(post_process_params.get('variance')).replace('.','_') +\ '-BG-' + str(post_process_params.get('preserve_background')) sub_dir = os.path.join(result_dir, name) os.makedirs(sub_dir, exist_ok=True) #write the used parameter into a text file and store it in the result folder completeName = os.path.join(sub_dir, "parameter.txt") file1 = open(completeName, "w+") json.dump(post_process_params, file1) file1.close() # post-process segmentation and evaluate with post-processing images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=False) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_post_processed[i], os.path.join(sub_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # save all results in csv file result_file = os.path.join(sub_dir, 'results.csv') writer.CSVWriter(result_file).write(evaluator.results) print('\nSubject-wise results...') writer.ConsoleWriter().write(evaluator.results) # report also mean and standard deviation among all subjects result_summary_file = os.path.join(sub_dir, 'results_summary.csv') functions = {'MEAN': np.mean, 'STD': np.std} writer.CSVStatisticsWriter( result_summary_file, functions=functions).write(evaluator.results) print('\nAggregated statistic results...') writer.ConsoleStatisticsWriter(functions=functions).write( evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear()
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() #warnings.warn('Random forest parameters not properly set.') forest = sk_ensemble.RandomForestClassifier( max_features=images[0].feature_matrix[0].shape[1], n_estimators=10, max_depth=10) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(img.feature_matrix[0]) probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'simple_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=False) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # use two writers to report the results os.makedirs( result_dir, exist_ok=True) # generate result directory, if it does not exists result_file = os.path.join(result_dir, 'results.csv') writer.CSVWriter(result_file).write(evaluator.results) print('\nSubject-wise results...') writer.ConsoleWriter().write(evaluator.results) # report also mean and standard deviation among all subjects result_summary_file = os.path.join(result_dir, 'results_summary.csv') functions = {'MEAN': np.mean, 'STD': np.std} writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results) print('\nAggregated statistic results...') writer.ConsoleStatisticsWriter(functions=functions).write( evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear()
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022] } #[0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() # compute sum over all ground truth images Label = putil.LabelImageTypes.AMYGDALA img_summed = util.compute_label_dist(images, Label) # save results sitk.WriteImage( img_summed, os.path.join(result_dir, 'groundtruth_sum_' + Label.name + '.mha'), True) # store preprocessed images to file file_id = open('data_train_reduced2.pckl', 'wb') pickle.dump(data_train, file_id) file_id.close() file_id = open('labels_train_reduced.pckl', 'wb') pickle.dump(labels_train, file_id) file_id.close() print('-' * 5, 'Preprocessed images stored') # n, m = img_summed.shape # x = np.arange(0, n-1, 1) # y = np.arange(0, m-1, 1) # meshgrid = np.meshgrid(x,y, sparse=False) # plt.pyplot.contour(meshgrid, img_summed) #printing out how much labels of each group were taken by the mask util.print_class_count(labels_train)
def main(FLAGS,trees,nodes): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') t='DF_trees_'+str(trees)+'_nodes_'+str(nodes) model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) pre_process_params = {'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True} # initialize decision forest parameters df_params = df.DecisionForestParameters() df_params.num_classes = 4 df_params.num_trees = trees df_params.max_nodes = nodes df_params.model_dir = model_dir forest = None start_time_total_train = timeit.default_timer() for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index: batch_index+TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]) if forest is None: df_params.num_features = data_train.shape[1] print(df_params) forest = df.DecisionForest(df_params) start_time = timeit.default_timer() forest.train(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index: batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() probabilities, predictions = forest.predict(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Training data size: {}'.format(len(data_items)), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Decision forest', file=summary_file) print(df_params, file=summary_file) stats = statistics.gather_statistics(os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, parameters_file: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ start_main = timeit.default_timer() # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) fof_parameters = {'10Percentile': True, '90Percentile': True, 'Energy': True, 'Entropy': True, 'InterquartileRange': True, 'Kurtosis': True, 'Maximum': True, 'MeanAbsoluteDeviation': True, 'Mean': True, 'Median': True, 'Minimum': True, 'Range': True, 'RobustMeanAbsoluteDeviation': True, 'RootMeanSquared': True, 'Skewness': True, 'TotalEnergy': True, 'Uniformity': True, 'Variance': True} glcm_parameters = {'Autocorrelation': True, 'ClusterProminence': True, 'ClusterShade': True, 'ClusterTendency': True, 'Contrast': True, 'Correlation': True, 'DifferenceAverage': True, 'DifferenceEntropy': True, 'DifferenceVariance': True, 'Id': True, 'Idm': True, 'Idmn': True, 'Idn': True, 'Imc1': True, 'Imc2': True, 'InverseVariance': True, 'JointAverage': True, 'JointEnergy': True, 'JointEntropy': True, 'MCC': True, 'MaximumProbability': True, 'SumAverage': True, 'SumEntropy': True, 'SumSquares': True} pre_process_params = {'skullstrip_pre': True, 'normalization_pre': True, 'registration_pre': True, 'save_features': False, 'coordinates_feature': True, 'intensity_feature': False, 'gradient_intensity_feature': False, 'first_order_feature': False, 'first_order_feature_parameters': fof_parameters, 'HOG_feature': False, 'GLCM_features': False, 'GLCM_features_parameters': glcm_parameters, 'n_estimators': 50, 'max_depth': 60, 'experiment_name': 'default' } parameters = json.load(open(parameters_file, 'r')) if bool(parameters): pre_process_params = parameters # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() np.nan_to_num(data_train, copy=False) # warnings.warn('Random forest parameters not properly set.') forest = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1], n_estimators=pre_process_params['n_estimators'], # 100 max_depth=pre_process_params['max_depth']) # 10 # Debugging nan_data_idx = np.argwhere(np.isnan(data_train)) np.savez('data_train.npz', data_train) np.save('data_nan.npy', nan_data_idx) start_time = timeit.default_timer() forest.fit(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp result_dir = os.path.join(result_dir, pre_process_params['experiment_name']) os.makedirs(result_dir, exist_ok=True) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator() # crawl the training image directories crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() predictions = forest.predict(np.nan_to_num(img.feature_matrix[0],copy=False)) probabilities = forest.predict_proba(np.nan_to_num(img.feature_matrix[0],copy=False)) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'simple_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=False) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # use two writers to report the results os.makedirs(result_dir, exist_ok=True) # generate result directory, if it does not exists result_file = os.path.join(result_dir, 'results.csv') writer.CSVWriter(result_file).write(evaluator.results) print('\nSubject-wise results...') writer.ConsoleWriter().write(evaluator.results) # report also mean and standard deviation among all subjects result_summary_file = os.path.join(result_dir, 'results_summary.csv') functions = {'MEAN': np.mean, 'STD': np.std} writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results) print('\nAggregated statistic results...') writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results) # clear results such that the evaluator is ready for the next evaluation evaluator.clear() end_main = timeit.default_timer() main_time = end_main - start_main # writing information on a txt file reporter.feature_writer(result_dir, pre_process_params, main_time, 'feature_report')