def main(_): """Trains a decision forest classifier on a two-dimensional point cloud.""" # generate model directory (use datetime to ensure that the directory is empty) t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # generate result directory os.makedirs(FLAGS.result_dir, exist_ok=True) # read file with training data data = Reader.load(FLAGS.input_file) # generate testing data test_data = Generator.get_test_data(1000) # generate decision forest parameters params = df.DecisionForestParameters() params.num_classes = data.label_count() params.num_features = data.dimension params.num_trees = 10 params.max_nodes = 100 # or params.set_max_nodes(...) params.use_training_loss = False params.report_feature_importances = True params.model_dir = model_dir print(params) # train the forest forest = df.DecisionForest(params) print('Decision forest training...') forest.train(data.data, data.labels) # or use load_estimator to load a model (create a DecisionForestParameters object and set the model_dir) # forest.load_estimator() # apply the forest to test data print('Decision forest testing...') probabilities, predictions = forest.predict(test_data) # or directly evaluate when labels are known # this can be used to see the feature importance # eval_data, eval_labels = Generator.get_test_data_with_label(50) # results = forest.evaluate(eval_data, eval_labels) # for key in sorted(results): # print('%s: %s' % (key, results[key])) # plot the result print('Plotting...') plotter = Plotter() plotter.plot_pixels_proba(test_data, np.array(probabilities)) plotter.plot_points(data.data, data.labels) plotter.save(os.path.join(FLAGS.result_dir, 'result_{}.png'.format(t)))
def main(FLAGS,trees,nodes): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') t='DF_trees_'+str(trees)+'_nodes_'+str(nodes) model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) pre_process_params = {'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True} # initialize decision forest parameters df_params = df.DecisionForestParameters() df_params.num_classes = 4 df_params.num_trees = trees df_params.max_nodes = nodes df_params.model_dir = model_dir forest = None start_time_total_train = timeit.default_timer() for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index: batch_index+TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]) if forest is None: df_params.num_features = data_train.shape[1] print(df_params) forest = df.DecisionForest(df_params) start_time = timeit.default_timer() forest.train(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index: batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() probabilities, predictions = forest.predict(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Training data size: {}'.format(len(data_items)), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Decision forest', file=summary_file) print(df_params, file=summary_file) stats = statistics.gather_statistics(os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)
def main(_): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(FLAGS.data_atlas_dir) print('-' * 5, 'Training...') # generate a model directory (use datetime to ensure that the directory is empty) # we need an empty directory because TensorFlow will continue training an existing model if it is not empty t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S') model_dir = os.path.join(FLAGS.model_dir, t) os.makedirs(model_dir, exist_ok=True) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) train_data_size = len(data_items) pre_process_params = { 'zscore_pre': True, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True } # initialize decision forest parameters df_params = df.DecisionForestParameters() df_params.num_classes = 4 df_params.num_trees = 160 df_params.max_nodes = 3000 df_params.model_dir = model_dir forest = None start_time_total_train = timeit.default_timer() for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE): cache_file_prefix = os.path.normpath( os.path.join( script_dir, './mia-cache/batch-' + str(batch_index) + '-' + str(TRAIN_BATCH_SIZE))) cache_file_train = cache_file_prefix + '-data_train.npy' cache_file_labels = cache_file_prefix + '-data_labels.npy' if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)): print('Using cache from ', cache_file_train) data_train = np.load(cache_file_train) labels_train = np.load(cache_file_labels) else: # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TRAIN_BATCH_SIZE]) # load images for training and pre-process images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) print('pre-processing done') # generate feature matrix and label vector data_train = np.concatenate( [img.feature_matrix[0] for img in images]) labels_train = np.concatenate( [img.feature_matrix[1] for img in images]) if NORMALIZE_FEATURES: # normalize data (mean 0, std 1) # data_train = scipy_stats.zscore(data_train) non_coord = scipy_stats.zscore(data_train[:, 3:8]) coord = data_train[:, 0:3] / 255 * 2 - 1 data_train = np.concatenate((coord, non_coord), axis=1) if (USE_PREPROCESS_CACHE): print('Writing cache') if (not os.path.exists(os.path.dirname(cache_file_prefix))): os.mkdir(os.path.dirname(cache_file_prefix)) data_train.dump(cache_file_train) labels_train.dump(cache_file_labels) if forest is None: df_params.num_features = data_train.shape[1] print(df_params) forest = df.DecisionForest(df_params) start_time = timeit.default_timer() forest.train(data_train, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') time_total_train = timeit.default_timer() - start_time_total_train start_time_total_test = timeit.default_timer() print('-' * 5, 'Testing...') result_dir = os.path.join(FLAGS.result_dir, t) os.makedirs(result_dir, exist_ok=True) # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) data_items = list(crawler.data.items()) all_probabilities = None for batch_index in range(0, len(data_items), TEST_BATCH_SIZE): # slicing manages out of range; no need to worry batch_data = dict(data_items[batch_index:batch_index + TEST_BATCH_SIZE]) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() features = img.feature_matrix[0] if NORMALIZE_FEATURES: # features = scipy_stats.zscore(features) non_coord = scipy_stats.zscore(features[:, 3:8]) coord = features[:, 0:3] / 255 * 2 - 1 features = np.concatenate((coord, non_coord), axis=1) probabilities, predictions = forest.predict(features) if all_probabilities is None: all_probabilities = np.array([probabilities]) else: all_probabilities = np.concatenate( (all_probabilities, [probabilities]), axis=0) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) image_probabilities = conversion.NumpySimpleITKImageBridge.convert( probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate( image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': True} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate( images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage( images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True) time_total_test = timeit.default_timer() - start_time_total_test # write summary of parameters to results dir with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file: print('Result dir: {}'.format(result_dir)) print('Result dir: {}'.format(result_dir), file=summary_file) print('Training data size: {}'.format(train_data_size), file=summary_file) print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file) print('Total testing time: {:.1f}s'.format(time_total_test), file=summary_file) print('Voxel Filter Mask: {}'.format( putil.FeatureExtractor.VOXEL_MASK_FLT), file=summary_file) print('Normalize Features: {}'.format(NORMALIZE_FEATURES), file=summary_file) print('Decision forest', file=summary_file) print(df_params, file=summary_file) stats = statistics.gather_statistics( os.path.join(result_dir, 'results.csv')) print('Result statistics:', file=summary_file) print(stats, file=summary_file)