def write_struct_to_config(self, params): self.params = params with open(self.config_path) as file: config = json.load(file) config['model']['criterion'] = params['criterion'] config['model']['model_type'] = params['model_type'] config['data']['with_blacklist'] = params['with_blacklist'] config['data']['with_arranged_mask'] = params['with_arranged_mask'] config['data_split']['seed'] = random.randint(1, 1000) config['model']['uid'] = self.uid = self.create_uid(params) config['training']['lr_policy'] = params['lr_scheduler'] config['model']['optimizer'] = params['optimizer'] config['augmentation']['mlebe']['normalization'] = params[ 'normalization'] config['augmentation']['mlebe']["scale_range"] = params['scale_range'] config['augmentation']['mlebe']["bias_field_prob"] = params[ 'bias_field_prob'] config['augmentation']['mlebe']['scale_size'] = params['scale_size'] if ('with_FLASH' in params.keys() and params['with_FLASH'] and 'irsabi_dargcc' not in config['data']['studies']): config['data']['studies'].append('irsabi_dargcc') if 'with_FLASH' in params.keys() and not params['with_FLASH']: config['data']['studies'] = [ elem for elem in config['data']['studies'] if elem != 'irsabi_dargcc' ] if config['model']['experiment_name'] != 'test': config['model']['experiment_name'] = self.create_experiment_name() with open(self.config_path, 'w') as outfile: json.dump(config, outfile, indent=4) self.json_config = json_file_to_pyobj(self.config_path)
def __init__(self, config_path, pretrained_model=False): """ Experiment_config class for logging of parameters. This is useful to keep track of old experimentations of parameters. Parameters ---------- config_path : str path to the json configuration file """ self.json_config = json_file_to_pyobj(config_path) self.pretrained_model = pretrained_model self.config_path = config_path self.start_time = timer()
def get_model_config(masking_opts, return_path=False): """ Returns model_config_path and writes model_path to it. """ model_folder_path = os.path.expanduser(masking_opts['model_folder_path']) for file in os.listdir(model_folder_path): if file.endswith('.json') and not file.startswith('._'): model_config_path = os.path.join(model_folder_path, file) if file.endswith('.pth') and not file.startswith('._'): model_path = os.path.join(model_folder_path, file) assert model_config_path, f'Model config path was not found under "{model_folder_path}"' assert model_path, f'Model path was not found under "{model_path}"' log.info( f'Writing model_config_path "{model_config_path}" and model_path "{model_path}" to masking_config.' ) write_to_jsonfile(model_config_path, [('model.path_pre_trained_model', model_path)]) if return_path: return model_config_path return json_file_to_pyobj(model_config_path)
import pandas as pd from numpy.random import choice from sklearn.metrics import mean_squared_error from math import sqrt import os import statsmodels.formula.api as smf from make_config import CONFIG_PATH as config_path, SCRATCH_DIR as scratch_dir from mlebe.training.utils.utils import json_file_to_pyobj workflow_config = json_file_to_pyobj(config_path) def bootstrap(df, factor, scratch_dir, nbr_samples=10000, test=False): if factor == 'Volume Conservation Factor': metric = 'VCF' elif factor == 'Smoothness Conservation Factor': metric = 'SCF' scratch_dir = os.path.expanduser(scratch_dir) if not os.path.isdir(scratch_dir + '/data/bootstrapped'): os.mkdir(scratch_dir + '/data/bootstrapped') generic_df = df.loc[df['Processing'] == 'Generic'] generic_masked_df = df.loc[df['Processing'] == 'Masked'] if workflow_config.workflow_config.with_FLASH: generic_CBV_df = generic_df.loc[generic_df['Contrast'] == 'T2w+CBV'] generic_BOLD_df = generic_df.loc[generic_df['Contrast'] == 'T2w+BOLD'] generic_masked_CBV_df = generic_masked_df.loc[ generic_masked_df['Contrast'] == 'T2w+CBV'] generic_masked_BOLD_df = generic_masked_df.loc[ generic_masked_df['Contrast'] == 'T2w+BOLD'] else:
def evaluate(config_path): json_opts = json_file_to_pyobj(config_path) template_dir = json_opts.data.template_dir model = get_model(json_opts.model) save_path = os.path.join(model.save_dir, 'irsabi_test') mkdir(save_path) data_type = json_opts.data.data_type print(save_path) # shape of the images on which the classifier was trained: training_shape = json_opts.augmentation.mlebe.scale_size[:3] ds_class = get_dataset('mlebe_dataset') # define preprocessing transformations for model ds_transform = get_dataset_transformation('mlebe', opts=json_opts.augmentation, max_output_channels=json_opts.model.output_nc) test_dataset = ds_class(template_dir, json_opts.data.data_dir, json_opts.data, split='test', transform=ds_transform['valid'], train_size=None, training_shape=training_shape) data_selection = test_dataset.data_selection transformer = ds_transform['valid']() temp = load_mask(template_dir) mask_data = [copy.deepcopy(temp) for _ in range(len(data_selection))] dice_scores_df = pd.DataFrame(columns=['volume_name', 'slice', 'dice_score', 'idx']) predictions = [] for volume in tqdm(range(len(data_selection))): # volume is an index # get volume volume_name = data_selection.iloc[volume]['uid'] img = nib.load(data_selection.iloc[volume]['path']).get_data() target = mask_data[volume].get_data() if json_opts.data.with_arranged_mask: # set the mask to zero where the image is zero target = arrange_mask(img, target) # img = preprocess(img, training_shape[:2], 'coronal') # target = preprocess(target, training_shape[:2], 'coronal') # # # set image shape to x,y,z # img = np.moveaxis(img, 0, 2) # target = np.moveaxis(target, 0, 2) # preprocess data for compatibility with model network_input = transformer(np.expand_dims(img, -1)) target = np.squeeze(transformer(np.expand_dims(target, -1)).cpu().byte().numpy()).astype(np.int16) # add dimension for batches network_input = network_input.unsqueeze(0) model.set_input(network_input) model.test() # predict mask_pred = np.squeeze(model.pred_seg.cpu().numpy()) img = np.squeeze(network_input.numpy()) # set image shape to z,x,y mask_pred = np.moveaxis(mask_pred, 2, 0) img = np.moveaxis(img, 2, 0) target = np.moveaxis(target, 2, 0) for slice in range(img.shape[0]): dice_score = dice(target[slice], mask_pred[slice]) # see if this is a black slice (want to skip those for visualisation) black_slice = np.max(img[slice]) <= 0 dice_scores_df = dice_scores_df.append( {'volume_name': volume_name, 'slice': slice, 'dice_score': dice_score, 'idx': volume, 'black_slice': black_slice}, ignore_index=True) predictions.append(mask_pred) min_df = dice_scores_df.loc[dice_scores_df['black_slice'] == False].sort_values(by=['dice_score']).head( sum(IMG_NBRs) // 2) min_df = pd.concat([min_df, dice_scores_df.loc[dice_scores_df['black_slice'] == False].sort_values(by=['dice_score']).tail( sum(IMG_NBRs) - sum(IMG_NBRs) // 2)], ignore_index=True) with PdfPages(os.path.join(save_path, 'irsabi_test_{}.pdf'.format(data_type))) as pdf: df_idx = 0 for IMG_NBR in IMG_NBRs: plt.figure(figsize=(40, IMG_NBR * 10)) plt.figtext(.5, .9, 'Mean dice score of {}'.format(np.round(dice_scores_df['dice_score'].mean(), 4)), fontsize=100, ha='center') i = 1 while i <= IMG_NBR * 2 and df_idx < len(min_df): volume = min_df.iloc[df_idx]['idx'] slice = min_df.iloc[df_idx]['slice'] dice_score = min_df.iloc[df_idx]['dice_score'] plt.subplot(IMG_NBR, 2, i) plt.imshow(img[slice], cmap='gray') plt.imshow(target[slice], cmap='Blues', alpha=0.6) plt.axis('off') i += 1 plt.subplot(IMG_NBR, 2, i) plt.imshow(img[slice], cmap='gray') plt.imshow(predictions[volume][slice], cmap='Blues', alpha=0.6) plt.title('Volume: {}, slice {}, dice {}'.format(volume_name, slice, dice_score)) plt.axis('off') i += 1 df_idx += 1 pdf.savefig() plt.close() plt.title('Dice score = {}'.format(dice_scores_df['dice_score'].mean())) plt.savefig('{}.pdf'.format(save_path), format='pdf') return dice_scores_df['dice_score'].mean(), dice_scores_df['dice_score'].std()
irregularity_list = '; '.join(irregularity_list) sessions.loc[sessions['acq_time'] == mydate, 'irregularities'] = irregularity_list sessions.to_csv(sessions_file, sep='\t', index=False) subjects_info = subjects_info.drop('birth_date', 1) subjects_info.to_csv('{}/participants.tsv'.format(bids_dir), sep='\t', index=False) subjects_info.to_csv('../data/participants.tsv'.format(bids_dir), sep='\t', index=False) if __name__ == '__main__': config = json_file_to_pyobj(config_path) if not os.path.exists(os.path.expanduser(os.path.join(scratch_dir, 'bids'))): if os.path.exists('/usr/share/irsabi_bidsdata'): os.mkdir(os.path.expanduser(os.path.join(scratch_dir, 'bids'))) if config.workflow_config.subjects: subjects = config.workflow_config.subjects for subject in subjects: command = f'ln -s /usr/share/irsabi_bidsdata/sub-{subject} ~/.scratch/mlebe/bids/' os.system(command) command = 'cp /usr/share/irsabi_bidsdata/dataset_description.json ~/.scratch/mlebe/bids/' os.system(command) else: command = 'ln -s /usr/share/irsabi_bidsdata/* ~/.scratch/mlebe/bids/' os.system(command)
import os from itertools import product from os import path from pathlib import Path import nibabel as nib import numpy as np import pandas as pd from bids.layout import BIDSLayout from mlebe.training.utils.utils import json_file_to_pyobj from samri.report.snr import df_threshold_volume from make_config import CONFIG_PATH as config_path, SCRATCH_DIR as scratch_dir from utils.bootstrapping import bootstrap, bootstrap_analysis workflow_config = json_file_to_pyobj(config_path) def bids_autograb(bids_dir): bids_dir = path.abspath(path.expanduser(bids_dir)) layout = BIDSLayout(bids_dir, validate=False) df = layout.to_df() # Unclear in current BIDS specification, we refer to BOLD/CBV as modalities and func/anat as types df = df.rename(columns={'datatype': 'type', 'suffix': 'modality'}) return df base_df = bids_autograb('{}/bids_collapsed'.format(scratch_dir)) base_df = base_df.loc[~base_df['path'].str.endswith('.json')] base_df = base_df.loc[base_df['modality'].isin(['bold', 'cbv'])]
def train(json_filename, network_debug=False, experiment_config=None): """ Main training function for the model. Parameters ---------- json_filename : str Path to the json configuration file network_debug : bool (optional) experiment_config : class used for logging (optional) """ # Load options json_opts = json_file_to_pyobj(json_filename) bigprint(f'New try with parameters: {json_opts}') train_opts = json_opts.training # Setup Dataset and Augmentation ds_class = get_dataset('mlebe_dataset') ds_path = json_opts.data.data_dir template_path = json_opts.data.template_dir ds_transform = get_dataset_transformation( 'mlebe', opts=json_opts.augmentation, max_output_channels=json_opts.model.output_nc) # Setup channels channels = json_opts.data_opts.channels if len(channels) != json_opts.model.input_nc \ or len(channels) != getattr(json_opts.augmentation, 'mlebe').scale_size[-1]: raise Exception( 'Number of data channels must match number of model channels, and patch and scale size dimensions' ) # Setup the NN Model model = get_model(json_opts.model) if json_filename == 'configs/test_config.json': print('removing dir ', model.save_dir) shutil.rmtree(model.save_dir) os.mkdir(model.save_dir) if network_debug: print('# of pars: ', model.get_number_parameters()) print('fp time: {0:.3f} sec\tbp time: {1:.3f} sec per sample'.format( *model.get_fp_bp_time())) exit() # Setup Data Loader split_opts = json_opts.data_split data_opts = json_opts.data train_dataset = ds_class( template_path, ds_path, data_opts, split='train', save_dir=model.save_dir, transform=ds_transform['train'], train_size=split_opts.train_size, test_size=split_opts.test_size, valid_size=split_opts.validation_size, split_seed=split_opts.seed, training_shape=json_opts.augmentation.mlebe.scale_size[:3]) valid_dataset = ds_class( template_path, ds_path, data_opts, split='validation', save_dir=model.save_dir, transform=ds_transform['valid'], train_size=split_opts.train_size, test_size=split_opts.test_size, valid_size=split_opts.validation_size, split_seed=split_opts.seed, training_shape=json_opts.augmentation.mlebe.scale_size[:3]) test_dataset = ds_class( template_path, ds_path, data_opts, split='test', save_dir=model.save_dir, transform=ds_transform['valid'], train_size=split_opts.train_size, test_size=split_opts.test_size, valid_size=split_opts.validation_size, split_seed=split_opts.seed, training_shape=json_opts.augmentation.mlebe.scale_size[:3]) train_loader = DataLoader(dataset=train_dataset, num_workers=1, batch_size=train_opts.batchSize, shuffle=True) valid_loader = DataLoader(dataset=valid_dataset, num_workers=1, batch_size=train_opts.batchSize, shuffle=False) test_loader = DataLoader(dataset=test_dataset, num_workers=1, batch_size=train_opts.batchSize, shuffle=False) # Visualisation Parameters visualizer = Visualiser(json_opts.visualisation, save_dir=model.save_dir) error_logger = ErrorLogger() # Training Function model.set_scheduler(train_opts) # Setup Early Stopping early_stopper = EarlyStopper(json_opts.training.early_stopping_patience) for epoch in range(model.which_epoch, train_opts.n_epochs): print('(epoch: %d, total # iters: %d)' % (epoch, len(train_loader))) train_volumes = [] validation_volumes = [] # Training Iterations for epoch_iter, (images, labels, indices) in tqdm(enumerate(train_loader, 1), total=len(train_loader)): # Make a training update model.set_input(images, labels) model.optimize_parameters() # model.optimize_parameters_accumulate_grd(epoch_iter) # Error visualisation errors = model.get_current_errors() error_logger.update(errors, split='train') ids = train_dataset.get_ids(indices) volumes = model.get_current_volumes() visualizer.display_current_volumes(volumes, ids, 'train', epoch) train_volumes.append(volumes) # Validation and Testing Iterations for loader, split, dataset in zip([valid_loader, test_loader], ['validation', 'test'], [valid_dataset, test_dataset]): for epoch_iter, (images, labels, indices) in tqdm(enumerate(loader, 1), total=len(loader)): ids = dataset.get_ids(indices) # Make a forward pass with the model model.set_input(images, labels) model.validate() # Error visualisation errors = model.get_current_errors() stats = model.get_segmentation_stats() error_logger.update({**errors, **stats}, split=split) # Visualise predictions if split == 'validation': # do not look at testing # Visualise predictions volumes = model.get_current_volumes() visualizer.display_current_volumes(volumes, ids, split, epoch) validation_volumes.append(volumes) current_loss = error_logger.get_errors('validation')['Seg_Loss'] # Update best validation loss/epoch values model.update_validation_state(epoch, current_loss) early_stopper.update(model, epoch, current_loss) # Update the plots for split in ['train', 'validation', 'test']: visualizer.plot_current_errors(epoch, error_logger.get_errors(split), split_name=split) visualizer.print_current_errors(epoch, error_logger.get_errors(split), split_name=split) visualizer.save_plots(epoch, save_frequency=5) error_logger.reset() # saving checkpoint if model.is_improving: print('saving model') # replacing old model with new model model.save(json_opts.model.model_type, epoch) # Update the model learning rate model.update_learning_rate(metric=current_loss) if early_stopper.should_stop_early: print('early stopping') # get validation metrics val_loss_log = pd.read_excel(os.path.join( 'checkpoints', json_opts.model.experiment_name, 'loss_log.xlsx'), sheet_name='validation').iloc[:, 1:] irsabi_dice_mean, irsabi_dice_std = finalize( json_opts, json_filename, model, experiment_config) val_loss_log['irsabi_dice_mean'] = irsabi_dice_mean val_loss_log['irsabi_dice_std'] = irsabi_dice_std return val_loss_log.loc[val_loss_log['Seg_Loss'] == val_loss_log['Seg_Loss'].min()] # get validation metrics val_loss_log = pd.read_excel(os.path.join(json_opts.model.checkpoints_dir, json_opts.model.experiment_name, 'loss_log.xlsx'), sheet_name='validation').iloc[:, 1:] irsabi_dice_mean, irsabi_dice_std = finalize(json_opts, json_filename, model, experiment_config) val_loss_log['irsabi_dice_mean'] = irsabi_dice_mean val_loss_log['irsabi_dice_std'] = irsabi_dice_std return val_loss_log.loc[val_loss_log['Seg_Loss'] == val_loss_log['Seg_Loss'].min()]
alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)) y_train.append(label_arr[..., slice]) with open(os.path.join(save_directory, 'x_train.npy'), 'wb') as file1: np.save(file1, x_train) with open(os.path.join(save_directory, 'y_train.npy'), 'wb') as file2: np.save(file2, y_train) save_dir = os.path.expanduser(os.path.join(scratch_dir, 'classifiers', 'T2')) mkdir(save_dir) workflow_json_opts = json_file_to_pyobj(config_path) model_config_path = Path(workflow_json_opts.masking_config.masking_config_anat. model_folder_path) / 'trained_mlebe_config_anat.json' model_json_opts = json_file_to_pyobj(model_config_path) data_dir = model_json_opts.data.data_dir template_dir = '/usr/share/mouse-brain-atlases/' ds_class = get_dataset('mlebe_dataset') ds_path = model_json_opts.data.data_dir channels = model_json_opts.data_opts.channels split_opts = model_json_opts.data_split train_opts = model_json_opts.training ds_transform = get_dataset_transformation( 'mlebe', opts=model_json_opts.augmentation, max_output_channels=model_json_opts.model.output_nc)