Exemple #1
0
 def summary(self):
     # torchsummary only supported [cuda, cpu]. not cuda:0
     device = str(self.device).split(":")[0]
     torch_summary(
         self,
         input_size=(self._channels, self._height, self._width),
         device=device,
     )
def run_experiment(
        experiment: Experiment,
        debug_pipeline: bool = False,
        develop_mode: bool = False,
        data_loader_workers: int = 1,
        cross_validation_iterations: int = 3,
        device: str = "cpu",
        develop_mode_sampls: int = 10
) -> List[Result]:
    LOGGER.info("Beginning experiment: %s, %s", experiment.id(), experiment.description())

    pipeline = Pipeline(experiment.pipeline_stages(), debug=debug_pipeline)
    augmentations = AugmentedCollate(experiment.augmentation_stages())

    dfs = experiment.train_test_data_frames()
    directories = experiment.train_test_directories()

    # File system cache with a 1 to 1 mapping to an experiment, used to cache data for multiple workers,
    # can safely be used in each cross validation run
    cache = joblib.Memory(f'./cachedir/{experiment.id()}', verbose=0)
    LOGGER.info("Initialised cache: %s", cache)

    LOGGER.info("Creating APTOSDataset for the following directories: %s", directories)
    dataset = TorchConcatDataset(
        [APTOSDataset(df, directory, pipeline, cache) for df, directory in zip(dfs, directories)]
    )
    # To facilitate software development this makes running end to end tests feasible
    if develop_mode:
        LOGGER.warn("Running in develop mode, using a fraction of the whole dataset")
        dataset, _ = torch_random_split(dataset, [develop_mode_sampls, len(dataset) - develop_mode_sampls])

    results = []
    for cv_iteration in range(1,  cross_validation_iterations + 1):
        LOGGER.info("Cross validation iteration: %s", cv_iteration)

        with APTOSMonitor(experiment, cv_iteration) as monitor:
            LOGGER.info(f'tensorboard --logdir "{monitor._summary_writer.log_dir}"')

            test_size = experiment.test_size()
            train_ds, test_ds = torch_random_split(
                dataset,
                [round((1 - test_size) * len(dataset)), round(test_size * len(dataset))]
            )

            sampler, sampler_kwargs = experiment.sampler()
            sampler = sampler(train_ds, **sampler_kwargs)

            train_loader = TorchDataLoader(
                train_ds,
                batch_size=experiment.batch_size(),
                num_workers=data_loader_workers,
                # Potentially an unconventional use of collate_fn, but it does make the
                # train data loader responsible for augmentations which is nice.
                collate_fn=augmentations,
                sampler=sampler
            )

            test_loader = TorchDataLoader(
                test_ds,
                batch_size=experiment.batch_size(),
                num_workers=data_loader_workers,
            )

            model = experiment.model(input_shape=train_ds[0][0].shape)
            print(torch_summary(model.cuda(), train_ds[0][0].shape))

            optimizer_class, optim_kwargs = experiment.optimizer()
            optimizer = optimizer_class(model.parameters(), **optim_kwargs)

            lr_scheduler, scheduler_kwargs = experiment.lr_scheduler()
            lr_scheduler = lr_scheduler(optimizer, **scheduler_kwargs)

            monitor.on_cv_start(train_ds, augmentations)

            for epoch in range(1, experiment.max_epochs() + 1):

                LOGGER.info("Epoch: %s", epoch)

                train(model, train_loader, optimizer, device, monitor)
                lr_scheduler.step()

                predictions_proba, predictions,  targets, ids, losses = test(model, test_loader, device, monitor)

                if epoch % 2 == 0:
                    checkpoint = {
                        'model': model,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'experiment': experiment.state_dict()
                    }

                    checkpoint_directory = f'results/{experiment.id()}'
                    if not os.path.isdir(checkpoint_directory):
                        os.mkdir(checkpoint_directory)

                    torch.save(checkpoint, os.path.join(checkpoint_directory, f'{cv_iteration}-{epoch}-checkpoint.pth'))

            monitor.on_cv_end()

        predictions = predictions.tolist()
        targets = targets.tolist()

        results_df = pd.DataFrame({
            "experiment_id": [experiment.id() for _ in range(len(targets))],
            "cross_validation_iteration": [cv_iteration for _ in range(len(targets))],
            "targets": targets,
            "predictions": predictions,
            "id_code": ids
        })

        results.append(Result(experiment, results_df))

    # Deletes content on disk... (until experiments have a unique hash this make sense)
    cache.clear()

    return results
def run_experiment(experiment: Experiment,
                   debug_pipeline: bool = False,
                   develop_mode: bool = False,
                   data_loader_workers: int = 1,
                   cross_validation_iterations: int = 3,
                   device: str = "cpu",
                   develop_mode_sampls: int = 10) -> List[Result]:
    LOGGER.info("Beginning experiment: %s, %s", experiment.id(),
                experiment.description())

    #preprocessing
    pipeline = Pipeline(experiment.pipeline_stages(), debug=debug_pipeline)
    #augmentations
    augmentations = AugmentedCollate(experiment.augmentation_stages())
    test_augmentations = AugmentedCollate(
        experiment.test_augmentation_stages())

    dfs = experiment.train_test_data_frames()
    directories = experiment.train_test_directories()

    # File system cache with a 1 to 1 mapping to an experiment, used to cache data for multiple workers,
    # can safely be used in each cross validation run
    cache = joblib.Memory(f'./cachedir/{experiment.id()}', verbose=0)
    LOGGER.info("Initialised cache: %s", cache)

    LOGGER.info("Creating APTOSDataset for the following directories: %s",
                directories)

    dataset = TorchConcatDataset([
        APTOSDataset(df, directory, pipeline, cache)
        for df, directory in zip(dfs, directories)
    ])

    # To facilitate software development this makes running end to end tests feasible
    if develop_mode:
        LOGGER.warn(
            "Running in develop mode, using a fraction of the whole dataset")
        dataset, _ = torch_random_split(
            dataset, [develop_mode_sampls,
                      len(dataset) - develop_mode_sampls])

    results = []

    # Stratified ShuffleSplit cross-validator, Provides train/test indices to split data in train/test sets.
    sss = StratifiedShuffleSplit(n_splits=cross_validation_iterations,
                                 test_size=experiment.test_size(),
                                 train_size=1 - experiment.test_size(),
                                 random_state=0)
    #TODO: will probably need debugging when more than one datasets are added
    labels = np.asarray([x for x in dfs[0]["diagnosis"]])
    split_generator = sss.split(np.zeros(labels.shape), labels)

    for cv_iteration, (train_index, test_index) in zip(
            range(1, cross_validation_iterations + 1), split_generator):
        LOGGER.info("Cross validation iteration: %s", cv_iteration)

        with APTOSMonitor(experiment, cv_iteration) as monitor:
            LOGGER.info(
                f'tensorboard --logdir "{monitor._summary_writer.log_dir}"')

            test_ds = Subset(dataset, test_index)
            train_ds = Subset(dataset, train_index)

            LOGGER.info("train data size: {}".format(train_ds.__len__()))
            LOGGER.info("Histogram of classses {}".format(
                np.histogram(labels[train_index], 5)))
            class_data = np.histogram(labels[train_index], 5)[0]
            class_weights = class_data.sum() / (class_data.shape[0] *
                                                class_data)

            LOGGER.info("test data size: {}".format(test_ds.__len__()))
            LOGGER.info("Histogram of classses {}".format(
                np.histogram(labels[test_index], 5)))

            sampler, sampler_kwargs = experiment.sampler()
            sampler = sampler(train_ds, **sampler_kwargs)

            train_loader = TorchDataLoader(
                train_ds,
                batch_size=experiment.batch_size(),
                num_workers=data_loader_workers,
                # Potentially an unconventional use of collate_fn, but it does make the
                # train data loader responsible for augmentations which is nice.
                collate_fn=augmentations,
                sampler=sampler)

            test_loader = TorchDataLoader(test_ds,
                                          batch_size=experiment.batch_size(),
                                          num_workers=data_loader_workers,
                                          collate_fn=test_augmentations)

            model = experiment.model(input_shape=train_ds[0][0].shape)
            print(torch_summary(model.cuda(), train_ds[0][0].shape))

            optimizer_class, optim_kwargs = experiment.optimizer()
            optimizer = optimizer_class(model.parameters(), **optim_kwargs)

            lr_scheduler, scheduler_kwargs = experiment.lr_scheduler()
            lr_scheduler = lr_scheduler(optimizer, **scheduler_kwargs)

            monitor.on_cv_start(train_ds, augmentations)

            #add parameter alpha for class weights
            criterion = FocalLoss(num_class=5, gamma=2, alpha=class_weights)

            for epoch in range(1, experiment.max_epochs() + 1):

                LOGGER.info("Epoch: %s", epoch)

                train(model, train_loader, optimizer, device, criterion,
                      monitor)
                lr_scheduler.step()

                predictions_proba, predictions, targets, ids, losses = test(
                    model, test_loader, device, criterion, monitor)

                if epoch % 2 == 0:
                    checkpoint = {
                        'model': model,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'experiment': experiment.state_dict()
                    }

                    checkpoint_directory = f'results/{experiment.id()}'
                    if not os.path.isdir(checkpoint_directory):
                        os.mkdir(checkpoint_directory)

                    torch.save(
                        checkpoint,
                        os.path.join(checkpoint_directory,
                                     f'{cv_iteration}-{epoch}-checkpoint.pth'))

            monitor.on_cv_end()

        predictions = predictions.tolist()
        targets = targets.tolist()

        results_df = pd.DataFrame({
            "experiment_id": [experiment.id() for _ in range(len(targets))],
            "cross_validation_iteration":
            [cv_iteration for _ in range(len(targets))],
            "targets":
            targets,
            "predictions":
            predictions,
            "id_code":
            ids
        })

        results.append(Result(experiment, results_df))

    # Deletes content on disk... (until experiments have a unique hash this make sense)
    cache.clear()

    return results