Ejemplo n.º 1
0
def test_explore_cohort():
    with TemporaryDirectory() as tmp_dir:
        results = Results(tmp_dir)
        cohort = Cohort(ShaipWorkspace())
        savefilename = 'cohort_table.png'
        results.explore_cohort(cohort, savefilename)
        assert os.path.exists(tmp_dir + savefilename)
Ejemplo n.º 2
0
def test_show_images():
    with TemporaryDirectory() as tmp_dir:
        results = Results(tmp_dir)
        cohort = Cohort(ShaipWorkspace())
        predictions = [0] * cohort.size
        savefilename = 'image_gallery.png'
        results.show_images(cohort, predictions, savefilename)
        assert os.path.exists(tmp_dir + savefilename)
Ejemplo n.º 3
0
def test__preprocess_one_dicom():
    algorithm = Algorithm()
    cohort = Cohort(ShaipWorkspace())
    dcm1 = cohort.dicoms[0]
    image = algorithm._preprocess_one_dicom(dcm1)
    assert image.shape == Algorithm.imshape
    plt.imshow(image)
    plt.colorbar()
    plt.show()
Ejemplo n.º 4
0
def test_split_cohort_train_test():
    cohort = Cohort(ShaipWorkspace())
    test_prop = 0.25
    train_cohort, test_cohort = cohort.split_cohort_train_test(test_prop)
    n = cohort.size
    n_train = int(n * (1.0 - test_prop))
    n_test = int(n * test_prop)
    assert n_train + n_test == n
    assert train_cohort.size == n_train
    assert test_cohort.size == n_test

    cohort_accessors_test_helper(train_cohort)
    cohort_accessors_test_helper(test_cohort)
    cohort_accessors_test_helper(cohort)
Ejemplo n.º 5
0
def test_preprocessed_cohort_with_cache():
    with TemporaryDirectory() as cache_dir:
        algorithm = Algorithm(cache_dir)
        cohort = Cohort(ShaipWorkspace())
        start1 = time.time()
        ppimages1 = algorithm.preprocessed_images(cohort)
        elapsed1 = time.time() - start1
        print("\nTime for first pass = %6.4f" % elapsed1)
        assert len(ppimages1) == cohort.size

        # And again, this time we should use the cache
        start2 = time.time()
        ppimages2 = algorithm.preprocessed_images(cohort)
        elapsed2 = time.time() - start2
        print("Time for second pass = %6.4f\n" % elapsed2)
        assert len(ppimages2) == cohort.size
        np.array_equal(ppimages1, ppimages2)

        # We expect the second pass to be much faster
        assert elapsed2 < elapsed1 / 5
Ejemplo n.º 6
0
 def __init__(self, shaip_root_dir):
     self.shaip = ShaipWorkspace(shaip_root_dir)
     self.shaip.check()
     self.algorithm = Algorithm(self.shaip.cache_dir)
     self.results = Results(self.shaip.results_dir)
     self.args = None
Ejemplo n.º 7
0
class Experiment(object):
    """ This is the top-level class, orchestrating train/test split of the cohort,
    training and evaluation.  However he details are all elsewhere"""
    def __init__(self, shaip_root_dir):
        self.shaip = ShaipWorkspace(shaip_root_dir)
        self.shaip.check()
        self.algorithm = Algorithm(self.shaip.cache_dir)
        self.results = Results(self.shaip.results_dir)
        self.args = None

    def command_line(self, argv):
        parser = argparse.ArgumentParser(
            prog='experiment.py',
            description='CT/CTA discrimination to run in SHAIP',
            epilog='If no phases are specified, program does nothing - exits')
        parser.add_argument('-t',
                            '--train',
                            help='perform model training',
                            action='store_true',
                            default=False)
        parser.add_argument('-p',
                            '--predict',
                            help='perform prediction over the test set',
                            action='store_true',
                            default=False)
        parser.add_argument('-e',
                            '--evaluate',
                            help='generate results',
                            action='store_true',
                            default=False)

        args = parser.parse_args(argv[1:])
        if not any([args.train, args.predict, args.evaluate]):
            parser.print_help()
            sys.exit(0)
        self.args = args

    def setup_logging(self):
        # see https://docs.python.org/2.4/lib/multiple-destinations.html

        logger = logging.getLogger('')
        logger.setLevel(logging.DEBUG)

        if len(logger.handlers) <= 1:
            # avoid double setup which can happen in unit tests

            # Define a Handler which writes INFO messages or higher to the sys.stderr
            console_handler = logging.StreamHandler()
            console_handler.setLevel(logging.INFO)
            simple_formatter = logging.Formatter('%(levelname)-8s %(message)s')
            console_handler.setFormatter(simple_formatter)

            # Set up logging to file for DEBUG messages or higher
            logfile_path = os.path.join(self.shaip.results_dir,
                                        'kaggle-ctmi.log')
            logfile_handler = logging.FileHandler(filename=logfile_path)
            logfile_handler.setLevel(logging.DEBUG)
            verbose_formatter = logging.Formatter(
                '%(asctime)s - %(levelname)s - %(message)s',
                datefmt='%d/%m/%y %H:%M')
            logfile_handler.setFormatter(verbose_formatter)

            # add the handlers to the logger
            logger.addHandler(console_handler)
            logger.addHandler(logfile_handler)

            # Silence matplotlib debug messages
            mpl_logger = logging.getLogger('matplotlib.font_manager')
            mpl_logger.setLevel(logging.WARNING)

    def main(self, argv):
        """ Main Experiment entry point.
        argv is the full argument list, so argv[0] is the program name.  In production
        call as main(sys.argv)"""

        np.random.seed(42)
        self.setup_logging()
        self.command_line(argv)
        start_time = time.time()

        logging.info("Starting Kaggle-CTMI Experiment\n")

        logging.info("Finding data and groundtruth...")
        cohort = Cohort(self.shaip)
        train_cohort, test_cohort = cohort.split_cohort_train_test(0.3)
        logging.info("Found %d datasets", cohort.size)

        if self.args.train:
            logging.info("Training on %d datasets...", train_cohort.size)
            model = self.algorithm.train(train_cohort)
            Algorithm.save_model(model, self.shaip.models_dir + 'model')
        else:
            logging.info("Skipping training, model saved from earlier run")
            model = self.algorithm.load_model(self.shaip.models_dir + 'model')

        if self.args.predict:
            logging.info("Prediction on %d datasets...", test_cohort.size)
            test_predictions = self.algorithm.predict(model, test_cohort)
        else:
            logging.info(
                "Skipping prediction, using predictions from earlier run")
            # TODO: need to sort out caching of predictions
            test_predictions = None

        if self.args.evaluate:
            logging.info(
                "Generating results to ShaipWorkspace/outputs/results/index.html..."
            )
            self.results.show_results(train_cohort, test_cohort,
                                      self.algorithm.history, test_predictions)

        logging.info("Kaggle-CTMI Experiment done in %4.1f seconds.\n",
                     (time.time() - start_time))
Ejemplo n.º 8
0
def test_dicom_path_from_id():
    shaip = ShaipWorkspace()
    path = shaip.dicom_path_from_id('ID_0012')
    print(path)
Ejemplo n.º 9
0
def test_cohort_accessors():
    cohort = Cohort(ShaipWorkspace())
    cohort_accessors_test_helper(cohort)
Ejemplo n.º 10
0
def test_init():
    cohort = Cohort(ShaipWorkspace())
    assert len(cohort.ids) == 16
    assert len(cohort.ids[0]) == 7 and cohort.ids[0][:3] == 'ID_'
Ejemplo n.º 11
0
def test_read_contrast_gt():
    cohort = Cohort(ShaipWorkspace())
    gt_path = 'ShaipUnittestWorkspace/inputs/groundtruth/ID_0001/ID_0001.txt'
    gt = cohort._read_contrast_gt(gt_path)
    assert gt == 1
Ejemplo n.º 12
0
def test_gt_path_from_id():
    shaip = ShaipWorkspace()
    path = shaip.gt_path_from_id('ID_0012')
    assert path == 'ShaipUnittestWorkspace/inputs/groundtruth/ID_0012/ID_0012.txt'
Ejemplo n.º 13
0
def test_train():
    algorithm = Algorithm()
    cohort = Cohort(ShaipWorkspace())
    model = algorithm.train(cohort)
    assert model is not None
Ejemplo n.º 14
0
def test_preprocessed_cohort():
    algorithm = Algorithm()
    cohort = Cohort(ShaipWorkspace())
    ppimages = algorithm.preprocessed_images(cohort)
    assert len(ppimages) == cohort.size