def test_explore_cohort(): with TemporaryDirectory() as tmp_dir: results = Results(tmp_dir) cohort = Cohort(ShaipWorkspace()) savefilename = 'cohort_table.png' results.explore_cohort(cohort, savefilename) assert os.path.exists(tmp_dir + savefilename)
def test_show_images(): with TemporaryDirectory() as tmp_dir: results = Results(tmp_dir) cohort = Cohort(ShaipWorkspace()) predictions = [0] * cohort.size savefilename = 'image_gallery.png' results.show_images(cohort, predictions, savefilename) assert os.path.exists(tmp_dir + savefilename)
def test__preprocess_one_dicom(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) dcm1 = cohort.dicoms[0] image = algorithm._preprocess_one_dicom(dcm1) assert image.shape == Algorithm.imshape plt.imshow(image) plt.colorbar() plt.show()
def test_split_cohort_train_test(): cohort = Cohort(ShaipWorkspace()) test_prop = 0.25 train_cohort, test_cohort = cohort.split_cohort_train_test(test_prop) n = cohort.size n_train = int(n * (1.0 - test_prop)) n_test = int(n * test_prop) assert n_train + n_test == n assert train_cohort.size == n_train assert test_cohort.size == n_test cohort_accessors_test_helper(train_cohort) cohort_accessors_test_helper(test_cohort) cohort_accessors_test_helper(cohort)
def test_preprocessed_cohort_with_cache(): with TemporaryDirectory() as cache_dir: algorithm = Algorithm(cache_dir) cohort = Cohort(ShaipWorkspace()) start1 = time.time() ppimages1 = algorithm.preprocessed_images(cohort) elapsed1 = time.time() - start1 print("\nTime for first pass = %6.4f" % elapsed1) assert len(ppimages1) == cohort.size # And again, this time we should use the cache start2 = time.time() ppimages2 = algorithm.preprocessed_images(cohort) elapsed2 = time.time() - start2 print("Time for second pass = %6.4f\n" % elapsed2) assert len(ppimages2) == cohort.size np.array_equal(ppimages1, ppimages2) # We expect the second pass to be much faster assert elapsed2 < elapsed1 / 5
def __init__(self, shaip_root_dir): self.shaip = ShaipWorkspace(shaip_root_dir) self.shaip.check() self.algorithm = Algorithm(self.shaip.cache_dir) self.results = Results(self.shaip.results_dir) self.args = None
class Experiment(object): """ This is the top-level class, orchestrating train/test split of the cohort, training and evaluation. However he details are all elsewhere""" def __init__(self, shaip_root_dir): self.shaip = ShaipWorkspace(shaip_root_dir) self.shaip.check() self.algorithm = Algorithm(self.shaip.cache_dir) self.results = Results(self.shaip.results_dir) self.args = None def command_line(self, argv): parser = argparse.ArgumentParser( prog='experiment.py', description='CT/CTA discrimination to run in SHAIP', epilog='If no phases are specified, program does nothing - exits') parser.add_argument('-t', '--train', help='perform model training', action='store_true', default=False) parser.add_argument('-p', '--predict', help='perform prediction over the test set', action='store_true', default=False) parser.add_argument('-e', '--evaluate', help='generate results', action='store_true', default=False) args = parser.parse_args(argv[1:]) if not any([args.train, args.predict, args.evaluate]): parser.print_help() sys.exit(0) self.args = args def setup_logging(self): # see https://docs.python.org/2.4/lib/multiple-destinations.html logger = logging.getLogger('') logger.setLevel(logging.DEBUG) if len(logger.handlers) <= 1: # avoid double setup which can happen in unit tests # Define a Handler which writes INFO messages or higher to the sys.stderr console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) simple_formatter = logging.Formatter('%(levelname)-8s %(message)s') console_handler.setFormatter(simple_formatter) # Set up logging to file for DEBUG messages or higher logfile_path = os.path.join(self.shaip.results_dir, 'kaggle-ctmi.log') logfile_handler = logging.FileHandler(filename=logfile_path) logfile_handler.setLevel(logging.DEBUG) verbose_formatter = logging.Formatter( '%(asctime)s - %(levelname)s - %(message)s', datefmt='%d/%m/%y %H:%M') logfile_handler.setFormatter(verbose_formatter) # add the handlers to the logger logger.addHandler(console_handler) logger.addHandler(logfile_handler) # Silence matplotlib debug messages mpl_logger = logging.getLogger('matplotlib.font_manager') mpl_logger.setLevel(logging.WARNING) def main(self, argv): """ Main Experiment entry point. argv is the full argument list, so argv[0] is the program name. In production call as main(sys.argv)""" np.random.seed(42) self.setup_logging() self.command_line(argv) start_time = time.time() logging.info("Starting Kaggle-CTMI Experiment\n") logging.info("Finding data and groundtruth...") cohort = Cohort(self.shaip) train_cohort, test_cohort = cohort.split_cohort_train_test(0.3) logging.info("Found %d datasets", cohort.size) if self.args.train: logging.info("Training on %d datasets...", train_cohort.size) model = self.algorithm.train(train_cohort) Algorithm.save_model(model, self.shaip.models_dir + 'model') else: logging.info("Skipping training, model saved from earlier run") model = self.algorithm.load_model(self.shaip.models_dir + 'model') if self.args.predict: logging.info("Prediction on %d datasets...", test_cohort.size) test_predictions = self.algorithm.predict(model, test_cohort) else: logging.info( "Skipping prediction, using predictions from earlier run") # TODO: need to sort out caching of predictions test_predictions = None if self.args.evaluate: logging.info( "Generating results to ShaipWorkspace/outputs/results/index.html..." ) self.results.show_results(train_cohort, test_cohort, self.algorithm.history, test_predictions) logging.info("Kaggle-CTMI Experiment done in %4.1f seconds.\n", (time.time() - start_time))
def test_dicom_path_from_id(): shaip = ShaipWorkspace() path = shaip.dicom_path_from_id('ID_0012') print(path)
def test_cohort_accessors(): cohort = Cohort(ShaipWorkspace()) cohort_accessors_test_helper(cohort)
def test_init(): cohort = Cohort(ShaipWorkspace()) assert len(cohort.ids) == 16 assert len(cohort.ids[0]) == 7 and cohort.ids[0][:3] == 'ID_'
def test_read_contrast_gt(): cohort = Cohort(ShaipWorkspace()) gt_path = 'ShaipUnittestWorkspace/inputs/groundtruth/ID_0001/ID_0001.txt' gt = cohort._read_contrast_gt(gt_path) assert gt == 1
def test_gt_path_from_id(): shaip = ShaipWorkspace() path = shaip.gt_path_from_id('ID_0012') assert path == 'ShaipUnittestWorkspace/inputs/groundtruth/ID_0012/ID_0012.txt'
def test_train(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) model = algorithm.train(cohort) assert model is not None
def test_preprocessed_cohort(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) ppimages = algorithm.preprocessed_images(cohort) assert len(ppimages) == cohort.size