Ejemplo n.º 1
0
def fit_composite_model(model, input_data, output_folder, method, tmp_results_dir,
                        recalculate=False, cascade_names=None, optimizer_options=None):
    """Fits the composite model and returns the results as ROI lists per map.

     Args:
        model (:class:`~mdt.models.composite.DMRICompositeModel`): An implementation of an composite model
            that contains the model we want to optimize.
        input_data (:class:`~mdt.utils.MRIInputData`): The input data object for the model.
        output_folder (string): The path to the folder where to place the output.
            The resulting maps are placed in a subdirectory (named after the model name) in this output folder.
        method (str): The optimization routine to use.
        tmp_results_dir (str): the main directory to use for the temporary results
        recalculate (boolean): If we want to recalculate the results if they are already present.
        cascade_names (list): the list of cascade names, meant for logging
        optimizer_options (dict): the additional optimization options
    """
    logger = logging.getLogger(__name__)
    output_path = os.path.join(output_folder, model.name)

    if not model.is_input_data_sufficient(input_data):
        raise InsufficientProtocolError(
            'The given protocol is insufficient for this model. '
            'The reported errors where: {}'.format(model.get_input_data_problems(input_data)))

    if not recalculate and model_output_exists(model, output_folder):
        maps = get_all_nifti_data(output_path)
        logger.info('Not recalculating {} model'.format(model.name))
        return create_roi(maps, input_data.mask)

    with per_model_logging_context(output_path):
        logger.info('Using MDT version {}'.format(__version__))
        logger.info('Preparing for model {0}'.format(model.name))
        logger.info('Current cascade: {0}'.format(cascade_names))

        model.set_input_data(input_data)

        if recalculate:
            if os.path.exists(output_path):
                list(map(os.remove, glob.glob(os.path.join(output_path, '*.nii*'))))
                if os.path.exists(os.path.join(output_path + 'covariances')):
                    shutil.rmtree(os.path.join(output_path + 'covariances'))

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        with _model_fit_logging(logger, model.name, model.get_free_param_names()):
            tmp_dir = get_full_tmp_results_path(output_path, tmp_results_dir)
            logger.info('Saving temporary results in {}.'.format(tmp_dir))

            worker = FittingProcessor(method, model, input_data.mask,
                                      input_data.nifti_header, output_path,
                                      tmp_dir, recalculate, optimizer_options=optimizer_options)

            processing_strategy = get_processing_strategy('optimization')
            return processing_strategy.process(worker)
Ejemplo n.º 2
0
    def __init__(self,
                 model,
                 input_data,
                 output_folder,
                 method=None,
                 optimizer_options=None,
                 recalculate=False,
                 only_recalculate_last=False,
                 cl_device_ind=None,
                 double_precision=False,
                 tmp_results_dir=True,
                 initialization_data=None,
                 post_processing=None):
        """Setup model fitting for the given input model and data.

        To actually fit the model call run().

        Args:
            model (str or :class:`~mdt.models.composite.DMRICompositeModel` or :class:`~mdt.models.cascade.DMRICascadeModelInterface`):
                    the model we want to optimize.
            input_data (:class:`~mdt.utils.MRIInputData`): the input data object containing
                all the info needed for the model fitting.
            output_folder (string): The full path to the folder where to place the output
            method (str): The optimization method to use, one of:
                - 'Levenberg-Marquardt'
                - 'Nelder-Mead'
                - 'Powell'
                - 'Subplex'

                If not given, defaults to 'Powell'.
            optimizer_options (dict): extra options passed to the optimization routines.
            recalculate (boolean): If we want to recalculate the results if they are already present.
            only_recalculate_last (boolean): If we want to recalculate all the models.
                This is only of importance when dealing with CascadeModels. If set to true we only recalculate
                the last element in the chain (if recalculate is set to True, that is). If set to false,
                we recalculate everything. This only holds for the first level of the cascade.
            cl_device_ind (int or list): the index of the CL device to use. The index is from the list from the function
                get_cl_devices(). This can also be a list of device indices.
            double_precision (boolean): if we would like to do the calculations in double precision
            tmp_results_dir (str, True or None): The temporary dir for the calculations. Set to a string to use
                that path directly, set to True to use the config value, set to None to disable.
            initialization_data (dict or :class:`~mdt.utils.InitializationData`): extra initialization data to use
                during model fitting. If we are optimizing a cascade model this data only applies to the last model in
                the cascade.
            post_processing (dict): a dictionary with flags for post-processing options to enable or disable.
                For valid elements, please see the configuration file settings for ``optimization``
                under ``post_processing``. Valid input for this parameter is for example: {'covariance': False}
                to disable automatic calculation of the covariance from the Hessian.

        """
        if isinstance(model, str):
            model = get_model(model)()

        if post_processing:
            model.update_active_post_processing('optimization',
                                                post_processing)

        self._model = model
        self._input_data = input_data
        self._output_folder = output_folder
        self._method = method
        self._optimizer_options = optimizer_options
        self._recalculate = recalculate
        self._only_recalculate_last = only_recalculate_last
        self._logger = logging.getLogger(__name__)

        self._model_names_list = []
        self._tmp_results_dir = get_temporary_results_dir(tmp_results_dir)

        if initialization_data is not None and not isinstance(
                initialization_data, InitializationData):
            self._initialization_data = SimpleInitializationData(
                **initialization_data)
        else:
            self._initialization_data = initialization_data

        if cl_device_ind is not None:
            self._cl_runtime_info = CLRuntimeInfo(
                cl_environments=get_cl_devices(cl_device_ind),
                double_precision=double_precision)
        else:
            self._cl_runtime_info = CLRuntimeInfo(
                double_precision=double_precision)

        if not model.is_input_data_sufficient(self._input_data):
            raise InsufficientProtocolError(
                'The provided protocol is insufficient for this model. '
                'The reported errors where: {}'.format(
                    self._model.get_input_data_problems(self._input_data)))
Ejemplo n.º 3
0
def sample_composite_model(model, input_data, output_folder, nmr_samples, thinning, burnin, tmp_dir,
                           method=None, recalculate=False, store_samples=True, sample_items_to_save=None,
                           initialization_data=None, post_sampling_cb=None, sampler_options=None):
    """Sample a composite model.

    Args:
        model (:class:`~mdt.models.composite.DMRICompositeModel`): a composite model to sample
        input_data (:class:`~mdt.utils.MRIInputData`): The input data object with which the model
            is initialized before running
        output_folder (string): The full path to the folder where to place the output
        nmr_samples (int): the number of samples we would like to return.
        burnin (int): the number of samples to burn-in, that is, to discard before returning the desired
            number of samples
        thinning (int): how many sample we wait before storing a new one. This will draw extra samples such that
                the total number of samples generated is ``nmr_samples * (thinning)`` and the number of samples stored
                is ``nmr_samples``. If set to one or lower we store every sample after the burn in.
        tmp_dir (str): the preferred temporary storage dir
        method (str): The sampling method to use, one of:
            - 'AMWG', for the Adaptive Metropolis-Within-Gibbs method
            - 'SCAM', for the Single Component Adaptive Metropolis
            - 'FSL', for the sampling method used in the FSL toolbox
            - 'MWG', for the Metropolis-Within-Gibbs (simple random walk metropolis without updates)

            If not given, defaults to 'AMWG'.
        recalculate (boolean): If we want to recalculate the results if they are already present.
        store_samples (boolean, sequence or :class:`mdt.lib.processing_strategies.SamplesStorageStrategy`): if set to
            False, we will store none of the samples. If set to True we will save all samples. If set to a sequence we
            expect a sequence of integer numbers with sample positions to store. Finally, you can also give a subclass
            instance of :class:`~mdt.lib.processing_strategies.SamplesStorageStrategy` (it is then typically set to
            a :class:`mdt.lib.processing_strategies.SaveThinnedSamples` instance).
        sample_items_to_save (list): list of output names we want to store the samples of. If given, we only
            store the items specified in this list. Valid items are the free parameter names of the model and the
            items 'LogLikelihood' and 'LogPrior'.
        initialization_data (:class:`~mdt.utils.InitializationData`): provides (extra) initialization data to use
            during model fitting. If we are optimizing a cascade model this data only applies to the last model in the
            cascade.
        post_sampling_cb (Callable[
            [mot.sample.base.SamplingOutput, mdt.models.composite.DMRICompositeModel], Optional[Dict]]):
                additional post-processing called after sampling. This function can optionally return a (nested)
                dictionary with as keys dir-/file-names and as values maps to be stored in the results directory.
        sampler_options (dict): specific options for the MCMC routine. These will be provided to the sampling routine
            as additional keyword arguments to the constructor.
    """
    samples_storage_strategy = SaveAllSamples()
    if store_samples:
        if sample_items_to_save:
            samples_storage_strategy = SaveSpecificMaps(included=sample_items_to_save)
    else:
        samples_storage_strategy = SaveNoSamples()

    if not model.is_input_data_sufficient(input_data):
        raise InsufficientProtocolError(
            'The provided protocol is insufficient for this model. '
            'The reported errors where: {}'.format(model.get_input_data_problems(input_data)))

    logger = logging.getLogger(__name__)

    if not recalculate:
        if os.path.exists(os.path.join(output_folder, 'UsedMask.nii.gz')) \
                or os.path.exists(os.path.join(output_folder, 'UsedMask.nii')):
            logger.info('Not recalculating {} model'.format(model.name))
            return load_samples(output_folder)

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    model.set_input_data(input_data)

    with per_model_logging_context(output_folder, overwrite=recalculate):
        if initialization_data:
            logger.info('Preparing the model with the user provided initialization data.')
            initialization_data.apply_to_model(model, input_data)

        with _log_info(logger, model.name):
            worker = SamplingProcessor(
                nmr_samples, thinning, burnin, method or 'AMWG',
                model, input_data.mask, input_data.nifti_header, output_folder,
                get_full_tmp_results_path(output_folder, tmp_dir), recalculate,
                samples_storage_strategy=samples_storage_strategy,
                post_sampling_cb=post_sampling_cb,
                sampler_options=sampler_options)

            processing_strategy = get_processing_strategy('sampling')
            return processing_strategy.process(worker)
Ejemplo n.º 4
0
def fit_model(model,
              input_data,
              output_folder,
              method=None,
              recalculate=False,
              cl_device_ind=None,
              double_precision=False,
              tmp_results_dir=True,
              initialization_data=None,
              use_cascaded_inits=True,
              post_processing=None,
              optimizer_options=None):
    """Run the optimizer on the given model.

    Args:
        model (str or :class:`~mdt.models.base.EstimableModel`):
            The name of a composite model or an implementation of a composite model.
        input_data (:class:`~mdt.utils.MRIInputData`): the input data object containing all
            the info needed for the model fitting.
        output_folder (string): The path to the folder where to place the output, we will make a subdir with the
            model name in it.
        method (str): The optimization method to use, one of:
            - 'Levenberg-Marquardt'
            - 'Nelder-Mead'
            - 'Powell'
            - 'Subplex'

            If not given, defaults to 'Powell'.

        recalculate (boolean): If we want to recalculate the results if they are already present.
        cl_device_ind (int or list): the index of the CL device to use. The index is from the list from the function
            utils.get_cl_devices(). This can also be a list of device indices.
        double_precision (boolean): if we would like to do the calculations in double precision
        tmp_results_dir (str, True or None): The temporary dir for the calculations. Set to a string to use
            that path directly, set to True to use the config value, set to None to disable.
        initialization_data (dict): provides (extra) initialization data to
            use during model fitting. This dictionary can contain the following elements:

            * ``inits``: dictionary with per parameter an initialization point
            * ``fixes``: dictionary with per parameter a fixed point, this will remove that parameter from the fitting
            * ``lower_bounds``: dictionary with per parameter a lower bound
            * ``upper_bounds``: dictionary with per parameter a upper bound
            * ``unfix``: a list of parameters to unfix

            For example::

                initialization_data = {
                    'fixes': {'Stick0.theta: np.array(...), ...},
                    'inits': {...}
                }

        use_cascaded_inits (boolean): if set, we initialize the model parameters using :func:`get_optimization_inits`.
            You can also overrule the default initializations using the ``initialization_data`` attribute.
        post_processing (dict): a dictionary with flags for post-processing options to enable or disable.
            For valid elements, please see the configuration file settings for ``optimization``
            under ``post_processing``. Valid input for this parameter is for example: {'covariance': False}
            to disable automatic calculation of the covariance from the Hessian.
        optimizer_options (dict): extra options passed to the optimization routines.

    Returns:
        dict: The result maps for the given composite model or the last model in the cascade.
            This returns the results as 3d/4d volumes for every output map.
    """
    logger = logging.getLogger(__name__)

    if not check_user_components():
        init_user_settings(pass_if_exists=True)

    if cl_device_ind is not None:
        if not isinstance(cl_device_ind, collections.Iterable):
            cl_device_ind = [cl_device_ind]
        cl_runtime_info = CLRuntimeInfo(
            cl_environments=get_cl_devices(cl_device_ind),
            double_precision=double_precision)
    else:
        cl_runtime_info = CLRuntimeInfo(double_precision=double_precision)

    if isinstance(model, str):
        model_name = model
        model_instance = get_model(model)()
    else:
        model_name = model.name
        model_instance = model

    if not model_instance.is_input_data_sufficient(input_data):
        raise InsufficientProtocolError(
            'The provided protocol is insufficient for this model. '
            'The reported errors where: {}'.format(
                model_instance.get_input_data_problems(input_data)))

    if post_processing:
        model_instance.update_active_post_processing('optimization',
                                                     post_processing)

    if use_cascaded_inits:
        if initialization_data is None:
            initialization_data = {}
        initialization_data['inits'] = initialization_data.get('inits', {})
        inits = get_optimization_inits(model_name,
                                       input_data,
                                       output_folder,
                                       cl_device_ind=cl_device_ind)
        inits.update(initialization_data['inits'])
        initialization_data['inits'] = inits

        initialization_data = SimpleInitializationData(**initialization_data)
        initialization_data.apply_to_model(model_instance, input_data)

        logger.info('Preparing {0} with the cascaded initializations.'.format(
            model_name))

    if method is None:
        method, optimizer_options = get_optimizer_for_model(model_name)

    with mot.configuration.config_context(CLRuntimeAction(cl_runtime_info)):
        fit_composite_model(model_instance,
                            input_data,
                            output_folder,
                            method,
                            get_temporary_results_dir(tmp_results_dir),
                            recalculate=recalculate,
                            optimizer_options=optimizer_options)

    return get_all_nifti_data(os.path.join(output_folder, model_name))
Ejemplo n.º 5
0
def compute_bootstrap(model,
                      input_data,
                      optimization_results,
                      output_folder,
                      bootstrap_method,
                      optimization_method,
                      nmr_samples,
                      tmp_dir,
                      recalculate=False,
                      keep_samples=True,
                      optimizer_options=None,
                      bootstrap_options=None):
    """Sample a composite model using residual bootstrapping

    Args:
        model (:class:`~mdt.models.base.EstimableModel`): a composite model to sample
        input_data (:class:`~mdt.lib.input_data.MRIInputData`): The input data object with which the model
            is initialized before running
        optimization_results (dict or str): the optimization results, either a dictionary with results or the
            path to a folder.
        output_folder (string): The relative output path.
            The resulting maps are placed in a subdirectory (named after the model name) in this output folder.
        bootstrap_method (str): the bootstrap method to use, one of 'residual' or 'wild'.
        optimization_method (str): The optimization routine to use.
        nmr_samples (int): the number of samples we would like to return.
        tmp_dir (str): the preferred temporary storage dir
        recalculate (boolean): If we want to recalculate the results if they are already present.
        keep_samples (boolean): determines if we keep any of the chains. If set to False, the chains will
            be discarded after generating the mean and standard deviations.
        optimizer_options (dict): the additional optimization options
        bootstrap_options (dict): the bootstrap options
    """
    from mdt.__version__ import __version__
    logger = logging.getLogger(__name__)
    logger.info('Using MDT version {}'.format(__version__))
    logger.info('Preparing {} bootstrap for model {}'.format(
        bootstrap_method, model.name))

    output_folder = os.path.join(output_folder, model.name,
                                 '{}_bootstrap'.format(bootstrap_method))

    if not model.is_input_data_sufficient(input_data):
        raise InsufficientProtocolError(
            'The provided protocol is insufficient for this model. '
            'The reported errors where: {}'.format(
                model.get_input_data_problems(input_data)))

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    if recalculate:
        shutil.rmtree(output_folder)
    else:
        if os.path.exists(os.path.join(output_folder, 'UsedMask.nii.gz')) \
                or os.path.exists(os.path.join(output_folder, 'UsedMask.nii')):
            logger.info('Not recalculating {} model'.format(model.name))
            return load_samples(output_folder)

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    bootstrap_options = bootstrap_options or {}

    with per_model_logging_context(output_folder, overwrite=recalculate):
        with _log_info(logger, model.name):
            if bootstrap_method == 'residual':
                worker_class = ResidualBootstrappingProcessor
            else:
                worker_class = WildBootstrappingProcessor

            worker = worker_class(optimization_method,
                                  input_data,
                                  optimization_results,
                                  nmr_samples,
                                  model,
                                  input_data.mask,
                                  input_data.nifti_header,
                                  output_folder,
                                  get_intermediate_results_path(
                                      output_folder, tmp_dir),
                                  recalculate,
                                  keep_samples=keep_samples,
                                  optimizer_options=optimizer_options,
                                  **bootstrap_options)

            processing_strategy = get_processing_strategy('sampling')
            return processing_strategy.process(worker)