Esempio n. 1
0
    def write_struct_to_config(self, params):
        self.params = params
        with open(self.config_path) as file:
            config = json.load(file)
        config['model']['criterion'] = params['criterion']
        config['model']['model_type'] = params['model_type']
        config['data']['with_blacklist'] = params['with_blacklist']
        config['data']['with_arranged_mask'] = params['with_arranged_mask']
        config['data_split']['seed'] = random.randint(1, 1000)
        config['model']['uid'] = self.uid = self.create_uid(params)
        config['training']['lr_policy'] = params['lr_scheduler']
        config['model']['optimizer'] = params['optimizer']
        config['augmentation']['mlebe']['normalization'] = params[
            'normalization']
        config['augmentation']['mlebe']["scale_range"] = params['scale_range']
        config['augmentation']['mlebe']["bias_field_prob"] = params[
            'bias_field_prob']
        config['augmentation']['mlebe']['scale_size'] = params['scale_size']
        if ('with_FLASH' in params.keys() and params['with_FLASH']
                and 'irsabi_dargcc' not in config['data']['studies']):
            config['data']['studies'].append('irsabi_dargcc')
        if 'with_FLASH' in params.keys() and not params['with_FLASH']:
            config['data']['studies'] = [
                elem for elem in config['data']['studies']
                if elem != 'irsabi_dargcc'
            ]

        if config['model']['experiment_name'] != 'test':
            config['model']['experiment_name'] = self.create_experiment_name()
        with open(self.config_path, 'w') as outfile:
            json.dump(config, outfile, indent=4)

        self.json_config = json_file_to_pyobj(self.config_path)
Esempio n. 2
0
    def __init__(self, config_path, pretrained_model=False):
        """
Experiment_config class for logging of parameters. This is useful to keep track of
old experimentations of parameters.
        Parameters
        ----------
        config_path : str
            path to the json configuration file
        """
        self.json_config = json_file_to_pyobj(config_path)
        self.pretrained_model = pretrained_model
        self.config_path = config_path
        self.start_time = timer()
Esempio n. 3
0
def get_model_config(masking_opts, return_path=False):
    """
    Returns model_config_path and writes model_path to it.
    """
    model_folder_path = os.path.expanduser(masking_opts['model_folder_path'])
    for file in os.listdir(model_folder_path):
        if file.endswith('.json') and not file.startswith('._'):
            model_config_path = os.path.join(model_folder_path, file)
        if file.endswith('.pth') and not file.startswith('._'):
            model_path = os.path.join(model_folder_path, file)
    assert model_config_path, f'Model config path was not found under "{model_folder_path}"'
    assert model_path, f'Model path was not found under "{model_path}"'

    log.info(
        f'Writing model_config_path "{model_config_path}" and model_path "{model_path}" to masking_config.'
    )

    write_to_jsonfile(model_config_path,
                      [('model.path_pre_trained_model', model_path)])
    if return_path:
        return model_config_path
    return json_file_to_pyobj(model_config_path)
Esempio n. 4
0
import pandas as pd
from numpy.random import choice
from sklearn.metrics import mean_squared_error
from math import sqrt
import os
import statsmodels.formula.api as smf
from make_config import CONFIG_PATH as config_path, SCRATCH_DIR as scratch_dir
from mlebe.training.utils.utils import json_file_to_pyobj

workflow_config = json_file_to_pyobj(config_path)


def bootstrap(df, factor, scratch_dir, nbr_samples=10000, test=False):
    if factor == 'Volume Conservation Factor':
        metric = 'VCF'
    elif factor == 'Smoothness Conservation Factor':
        metric = 'SCF'
    scratch_dir = os.path.expanduser(scratch_dir)
    if not os.path.isdir(scratch_dir + '/data/bootstrapped'):
        os.mkdir(scratch_dir + '/data/bootstrapped')

    generic_df = df.loc[df['Processing'] == 'Generic']
    generic_masked_df = df.loc[df['Processing'] == 'Masked']
    if workflow_config.workflow_config.with_FLASH:
        generic_CBV_df = generic_df.loc[generic_df['Contrast'] == 'T2w+CBV']
        generic_BOLD_df = generic_df.loc[generic_df['Contrast'] == 'T2w+BOLD']
        generic_masked_CBV_df = generic_masked_df.loc[
            generic_masked_df['Contrast'] == 'T2w+CBV']
        generic_masked_BOLD_df = generic_masked_df.loc[
            generic_masked_df['Contrast'] == 'T2w+BOLD']
    else:
Esempio n. 5
0
def evaluate(config_path):
    json_opts = json_file_to_pyobj(config_path)
    template_dir = json_opts.data.template_dir
    model = get_model(json_opts.model)
    save_path = os.path.join(model.save_dir, 'irsabi_test')
    mkdir(save_path)
    data_type = json_opts.data.data_type
    print(save_path)
    # shape of the images on which the classifier was trained:
    training_shape = json_opts.augmentation.mlebe.scale_size[:3]
    ds_class = get_dataset('mlebe_dataset')
    # define preprocessing transformations for model
    ds_transform = get_dataset_transformation('mlebe', opts=json_opts.augmentation,
                                              max_output_channels=json_opts.model.output_nc)

    test_dataset = ds_class(template_dir, json_opts.data.data_dir, json_opts.data, split='test',
                            transform=ds_transform['valid'],
                            train_size=None, training_shape=training_shape)
    data_selection = test_dataset.data_selection
    transformer = ds_transform['valid']()

    temp = load_mask(template_dir)
    mask_data = [copy.deepcopy(temp) for _ in range(len(data_selection))]
    dice_scores_df = pd.DataFrame(columns=['volume_name', 'slice', 'dice_score', 'idx'])
    predictions = []
    for volume in tqdm(range(len(data_selection))):  # volume is an index
        # get volume
        volume_name = data_selection.iloc[volume]['uid']
        img = nib.load(data_selection.iloc[volume]['path']).get_data()
        target = mask_data[volume].get_data()

        if json_opts.data.with_arranged_mask:
            # set the mask to zero where the image is zero
            target = arrange_mask(img, target)

        # img = preprocess(img, training_shape[:2], 'coronal')
        # target = preprocess(target, training_shape[:2], 'coronal')
        #
        # # set image shape to x,y,z
        # img = np.moveaxis(img, 0, 2)
        # target = np.moveaxis(target, 0, 2)

        # preprocess data for compatibility with model
        network_input = transformer(np.expand_dims(img, -1))
        target = np.squeeze(transformer(np.expand_dims(target, -1)).cpu().byte().numpy()).astype(np.int16)
        # add dimension for batches
        network_input = network_input.unsqueeze(0)
        model.set_input(network_input)
        model.test()
        # predict
        mask_pred = np.squeeze(model.pred_seg.cpu().numpy())
        img = np.squeeze(network_input.numpy())
        # set image shape to z,x,y
        mask_pred = np.moveaxis(mask_pred, 2, 0)
        img = np.moveaxis(img, 2, 0)
        target = np.moveaxis(target, 2, 0)

        for slice in range(img.shape[0]):
            dice_score = dice(target[slice], mask_pred[slice])
            # see if this is a black slice (want to skip those for visualisation)
            black_slice = np.max(img[slice]) <= 0
            dice_scores_df = dice_scores_df.append(
                {'volume_name': volume_name, 'slice': slice, 'dice_score': dice_score, 'idx': volume,
                 'black_slice': black_slice},
                ignore_index=True)
        predictions.append(mask_pred)
    min_df = dice_scores_df.loc[dice_scores_df['black_slice'] == False].sort_values(by=['dice_score']).head(
        sum(IMG_NBRs) // 2)
    min_df = pd.concat([min_df,
                        dice_scores_df.loc[dice_scores_df['black_slice'] == False].sort_values(by=['dice_score']).tail(
                            sum(IMG_NBRs) - sum(IMG_NBRs) // 2)],
                       ignore_index=True)
    with PdfPages(os.path.join(save_path, 'irsabi_test_{}.pdf'.format(data_type))) as pdf:
        df_idx = 0

        for IMG_NBR in IMG_NBRs:
            plt.figure(figsize=(40, IMG_NBR * 10))
            plt.figtext(.5, .9, 'Mean dice score of {}'.format(np.round(dice_scores_df['dice_score'].mean(), 4)),
                        fontsize=100, ha='center')
            i = 1
            while i <= IMG_NBR * 2 and df_idx < len(min_df):
                volume = min_df.iloc[df_idx]['idx']
                slice = min_df.iloc[df_idx]['slice']
                dice_score = min_df.iloc[df_idx]['dice_score']
                plt.subplot(IMG_NBR, 2, i)
                plt.imshow(img[slice], cmap='gray')
                plt.imshow(target[slice], cmap='Blues', alpha=0.6)
                plt.axis('off')
                i += 1
                plt.subplot(IMG_NBR, 2, i)
                plt.imshow(img[slice], cmap='gray')
                plt.imshow(predictions[volume][slice], cmap='Blues', alpha=0.6)
                plt.title('Volume: {}, slice {}, dice {}'.format(volume_name, slice, dice_score))
                plt.axis('off')
                i += 1
                df_idx += 1
            pdf.savefig()
            plt.close()

    plt.title('Dice score = {}'.format(dice_scores_df['dice_score'].mean()))
    plt.savefig('{}.pdf'.format(save_path), format='pdf')

    return dice_scores_df['dice_score'].mean(), dice_scores_df['dice_score'].std()
Esempio n. 6
0
                    irregularity_list = '; '.join(irregularity_list)
                    sessions.loc[sessions['acq_time'] == mydate,
                                 'irregularities'] = irregularity_list
                sessions.to_csv(sessions_file, sep='\t', index=False)

    subjects_info = subjects_info.drop('birth_date', 1)
    subjects_info.to_csv('{}/participants.tsv'.format(bids_dir),
                         sep='\t',
                         index=False)
    subjects_info.to_csv('../data/participants.tsv'.format(bids_dir),
                         sep='\t',
                         index=False)


if __name__ == '__main__':
    config = json_file_to_pyobj(config_path)

    if not os.path.exists(os.path.expanduser(os.path.join(scratch_dir,
                                                          'bids'))):
        if os.path.exists('/usr/share/irsabi_bidsdata'):
            os.mkdir(os.path.expanduser(os.path.join(scratch_dir, 'bids')))
            if config.workflow_config.subjects:
                subjects = config.workflow_config.subjects
                for subject in subjects:
                    command = f'ln -s /usr/share/irsabi_bidsdata/sub-{subject} ~/.scratch/mlebe/bids/'
                    os.system(command)
                command = 'cp /usr/share/irsabi_bidsdata/dataset_description.json ~/.scratch/mlebe/bids/'
                os.system(command)
            else:
                command = 'ln -s /usr/share/irsabi_bidsdata/* ~/.scratch/mlebe/bids/'
                os.system(command)
Esempio n. 7
0
import os
from itertools import product
from os import path
from pathlib import Path

import nibabel as nib
import numpy as np
import pandas as pd
from bids.layout import BIDSLayout
from mlebe.training.utils.utils import json_file_to_pyobj
from samri.report.snr import df_threshold_volume

from make_config import CONFIG_PATH as config_path, SCRATCH_DIR as scratch_dir
from utils.bootstrapping import bootstrap, bootstrap_analysis

workflow_config = json_file_to_pyobj(config_path)


def bids_autograb(bids_dir):
    bids_dir = path.abspath(path.expanduser(bids_dir))
    layout = BIDSLayout(bids_dir, validate=False)
    df = layout.to_df()

    # Unclear in current BIDS specification, we refer to BOLD/CBV as modalities and func/anat as types
    df = df.rename(columns={'datatype': 'type', 'suffix': 'modality'})
    return df


base_df = bids_autograb('{}/bids_collapsed'.format(scratch_dir))
base_df = base_df.loc[~base_df['path'].str.endswith('.json')]
base_df = base_df.loc[base_df['modality'].isin(['bold', 'cbv'])]
Esempio n. 8
0
def train(json_filename, network_debug=False, experiment_config=None):
    """
Main training function for the model.
    Parameters
    ----------
    json_filename : str
        Path to the json configuration file
    network_debug : bool (optional)
    experiment_config : class used for logging (optional)
    """

    # Load options
    json_opts = json_file_to_pyobj(json_filename)
    bigprint(f'New try with parameters: {json_opts}')
    train_opts = json_opts.training

    # Setup Dataset and Augmentation
    ds_class = get_dataset('mlebe_dataset')
    ds_path = json_opts.data.data_dir
    template_path = json_opts.data.template_dir
    ds_transform = get_dataset_transformation(
        'mlebe',
        opts=json_opts.augmentation,
        max_output_channels=json_opts.model.output_nc)

    # Setup channels
    channels = json_opts.data_opts.channels
    if len(channels) != json_opts.model.input_nc \
            or len(channels) != getattr(json_opts.augmentation, 'mlebe').scale_size[-1]:
        raise Exception(
            'Number of data channels must match number of model channels, and patch and scale size dimensions'
        )

    # Setup the NN Model
    model = get_model(json_opts.model)
    if json_filename == 'configs/test_config.json':
        print('removing dir ', model.save_dir)
        shutil.rmtree(model.save_dir)
        os.mkdir(model.save_dir)

    if network_debug:
        print('# of pars: ', model.get_number_parameters())
        print('fp time: {0:.3f} sec\tbp time: {1:.3f} sec per sample'.format(
            *model.get_fp_bp_time()))
        exit()

    # Setup Data Loader
    split_opts = json_opts.data_split
    data_opts = json_opts.data
    train_dataset = ds_class(
        template_path,
        ds_path,
        data_opts,
        split='train',
        save_dir=model.save_dir,
        transform=ds_transform['train'],
        train_size=split_opts.train_size,
        test_size=split_opts.test_size,
        valid_size=split_opts.validation_size,
        split_seed=split_opts.seed,
        training_shape=json_opts.augmentation.mlebe.scale_size[:3])
    valid_dataset = ds_class(
        template_path,
        ds_path,
        data_opts,
        split='validation',
        save_dir=model.save_dir,
        transform=ds_transform['valid'],
        train_size=split_opts.train_size,
        test_size=split_opts.test_size,
        valid_size=split_opts.validation_size,
        split_seed=split_opts.seed,
        training_shape=json_opts.augmentation.mlebe.scale_size[:3])
    test_dataset = ds_class(
        template_path,
        ds_path,
        data_opts,
        split='test',
        save_dir=model.save_dir,
        transform=ds_transform['valid'],
        train_size=split_opts.train_size,
        test_size=split_opts.test_size,
        valid_size=split_opts.validation_size,
        split_seed=split_opts.seed,
        training_shape=json_opts.augmentation.mlebe.scale_size[:3])
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=1,
                              batch_size=train_opts.batchSize,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_dataset,
                              num_workers=1,
                              batch_size=train_opts.batchSize,
                              shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             num_workers=1,
                             batch_size=train_opts.batchSize,
                             shuffle=False)

    # Visualisation Parameters
    visualizer = Visualiser(json_opts.visualisation, save_dir=model.save_dir)
    error_logger = ErrorLogger()

    # Training Function
    model.set_scheduler(train_opts)
    # Setup Early Stopping
    early_stopper = EarlyStopper(json_opts.training.early_stopping_patience)

    for epoch in range(model.which_epoch, train_opts.n_epochs):
        print('(epoch: %d, total # iters: %d)' % (epoch, len(train_loader)))
        train_volumes = []
        validation_volumes = []

        # Training Iterations
        for epoch_iter, (images, labels,
                         indices) in tqdm(enumerate(train_loader, 1),
                                          total=len(train_loader)):
            # Make a training update
            model.set_input(images, labels)
            model.optimize_parameters()
            # model.optimize_parameters_accumulate_grd(epoch_iter)

            # Error visualisation
            errors = model.get_current_errors()
            error_logger.update(errors, split='train')

            ids = train_dataset.get_ids(indices)
            volumes = model.get_current_volumes()
            visualizer.display_current_volumes(volumes, ids, 'train', epoch)
            train_volumes.append(volumes)

        # Validation and Testing Iterations
        for loader, split, dataset in zip([valid_loader, test_loader],
                                          ['validation', 'test'],
                                          [valid_dataset, test_dataset]):
            for epoch_iter, (images, labels,
                             indices) in tqdm(enumerate(loader, 1),
                                              total=len(loader)):
                ids = dataset.get_ids(indices)

                # Make a forward pass with the model
                model.set_input(images, labels)
                model.validate()

                # Error visualisation
                errors = model.get_current_errors()
                stats = model.get_segmentation_stats()
                error_logger.update({**errors, **stats}, split=split)

                # Visualise predictions
                if split == 'validation':  # do not look at testing
                    # Visualise predictions
                    volumes = model.get_current_volumes()
                    visualizer.display_current_volumes(volumes, ids, split,
                                                       epoch)
                    validation_volumes.append(volumes)

        current_loss = error_logger.get_errors('validation')['Seg_Loss']
        # Update best validation loss/epoch values
        model.update_validation_state(epoch, current_loss)
        early_stopper.update(model, epoch, current_loss)
        # Update the plots
        for split in ['train', 'validation', 'test']:
            visualizer.plot_current_errors(epoch,
                                           error_logger.get_errors(split),
                                           split_name=split)
            visualizer.print_current_errors(epoch,
                                            error_logger.get_errors(split),
                                            split_name=split)
        visualizer.save_plots(epoch, save_frequency=5)
        error_logger.reset()

        # saving checkpoint
        if model.is_improving:
            print('saving model')
            # replacing old model with new model
            model.save(json_opts.model.model_type, epoch)

        # Update the model learning rate
        model.update_learning_rate(metric=current_loss)

        if early_stopper.should_stop_early:
            print('early stopping')
            # get validation metrics
            val_loss_log = pd.read_excel(os.path.join(
                'checkpoints', json_opts.model.experiment_name,
                'loss_log.xlsx'),
                                         sheet_name='validation').iloc[:, 1:]

            irsabi_dice_mean, irsabi_dice_std = finalize(
                json_opts, json_filename, model, experiment_config)

            val_loss_log['irsabi_dice_mean'] = irsabi_dice_mean
            val_loss_log['irsabi_dice_std'] = irsabi_dice_std
            return val_loss_log.loc[val_loss_log['Seg_Loss'] ==
                                    val_loss_log['Seg_Loss'].min()]

    # get validation metrics
    val_loss_log = pd.read_excel(os.path.join(json_opts.model.checkpoints_dir,
                                              json_opts.model.experiment_name,
                                              'loss_log.xlsx'),
                                 sheet_name='validation').iloc[:, 1:]

    irsabi_dice_mean, irsabi_dice_std = finalize(json_opts, json_filename,
                                                 model, experiment_config)

    val_loss_log['irsabi_dice_mean'] = irsabi_dice_mean
    val_loss_log['irsabi_dice_std'] = irsabi_dice_std
    return val_loss_log.loc[val_loss_log['Seg_Loss'] ==
                            val_loss_log['Seg_Loss'].min()]
Esempio n. 9
0
                                      alpha=0,
                                      beta=1,
                                      norm_type=cv2.NORM_MINMAX,
                                      dtype=cv2.CV_32F))
                    y_train.append(label_arr[..., slice])

    with open(os.path.join(save_directory, 'x_train.npy'), 'wb') as file1:
        np.save(file1, x_train)
    with open(os.path.join(save_directory, 'y_train.npy'), 'wb') as file2:
        np.save(file2, y_train)


save_dir = os.path.expanduser(os.path.join(scratch_dir, 'classifiers', 'T2'))
mkdir(save_dir)

workflow_json_opts = json_file_to_pyobj(config_path)
model_config_path = Path(workflow_json_opts.masking_config.masking_config_anat.
                         model_folder_path) / 'trained_mlebe_config_anat.json'
model_json_opts = json_file_to_pyobj(model_config_path)
data_dir = model_json_opts.data.data_dir
template_dir = '/usr/share/mouse-brain-atlases/'

ds_class = get_dataset('mlebe_dataset')
ds_path = model_json_opts.data.data_dir
channels = model_json_opts.data_opts.channels
split_opts = model_json_opts.data_split
train_opts = model_json_opts.training
ds_transform = get_dataset_transformation(
    'mlebe',
    opts=model_json_opts.augmentation,
    max_output_channels=model_json_opts.model.output_nc)