Example #1
0
def mbm_pipeline(options : MBMConf):
    s = Stages()
    imgs = [MincAtom(name, pipeline_sub_dir=os.path.join(options.application.output_directory,
                                                         options.application.pipeline_name + "_processed"))
            for name in options.application.files]

    check_MINC_input_files([img.path for img in imgs])

    prefix = options.application.pipeline_name

    mbm_result = s.defer(mbm(imgs=imgs, options=options,
                         prefix=prefix,
                         output_dir=options.application.output_directory))

    # create useful CSVs (note the files listed therein won't yet exist ...)
    for filename, dataframe in (("transforms.csv", mbm_result.xfms),
                                ("determinants.csv", mbm_result.determinants)):
        with open(filename, 'w') as f:
            f.write(dataframe.applymap(maybe_deref_path).to_csv(index=False))

    # TODO moved here from inside `mbm` for now ... does this make most sense?
    if options.mbm.segmentation.run_maget:
        import copy
        maget_options = copy.deepcopy(options)  #Namespace(maget=options)
        #maget_options
        #maget_options.maget = maget_options.mbm
        #maget_options.execution = options.execution
        #maget_options.application = options.application
        maget_options.application.output_directory = os.path.join(options.application.output_directory, "segmentation")
        maget_options.maget = options.mbm.maget

        fixup_maget_options(maget_options=maget_options.maget,
                            nlin_options=maget_options.mbm.nlin,
                            lsq12_options=maget_options.mbm.lsq12)
        del maget_options.mbm

        s.defer(maget([xfm.resampled for _ix, xfm in mbm_result.xfms.rigid_xfm.iteritems()],
                       options=maget_options,
                       prefix="%s_MAGeT" % prefix,
                       output_dir=os.path.join(options.application.output_directory, prefix + "_processed")))

    return Result(stages=s, output=mbm_result)
Example #2
0
def mbm(imgs: List[MincAtom],
        options: MBMConf,
        prefix: str,
        output_dir: str = "",
        with_maget: bool = True):

    # TODO could also allow pluggable pipeline parts e.g. LSQ6 could be substituted out for the modified LSQ6
    # for the kidney tips, etc...

    # TODO this is tedious and annoyingly similar to the registration chain ...
    lsq6_dir = os.path.join(output_dir, prefix + "_lsq6")
    lsq12_dir = os.path.join(output_dir, prefix + "_lsq12")
    nlin_dir = os.path.join(output_dir, prefix + "_nlin")

    s = Stages()

    if len(imgs) == 0:
        raise ValueError("Please, some files!")

    # FIXME: why do we have to call registration_targets *outside* of lsq6_nuc_inorm? is it just because of the extra
    # options required?  Also, shouldn't options.registration be a required input (as it contains `input_space`) ...?
    targets = s.defer(
        registration_targets(lsq6_conf=options.mbm.lsq6,
                             app_conf=options.application,
                             reg_conf=options.registration,
                             first_input_file=imgs[0].path))

    # TODO this is quite tedious and duplicates stuff in the registration chain ...
    resolution = (options.registration.resolution or get_resolution_from_file(
        targets.registration_standard.path))
    options.registration = options.registration.replace(resolution=resolution)

    # FIXME: this needs to go outside of the `mbm` function to avoid being run from within other pipelines (or
    # those other pipelines need to turn off this option)
    if with_maget:
        if options.mbm.segmentation.run_maget or options.mbm.maget.maget.mask:

            # temporary fix...?
            if options.mbm.maget.maget.mask and not options.mbm.segmentation.run_maget:
                # which means that --no-run-maget was specified
                if options.mbm.maget.maget.atlas_lib == None:
                    # clearly you do not want to run MAGeT at any point in this pipeline
                    err_msg_maget = "\nYou specified not to run MAGeT using the " \
                                    "--no-run-maget flag. However, the code also " \
                                    "wants to use MAGeT to generate masks for your " \
                                    "input files after the 6 parameter alignment (lsq6). " \
                                    "Because you did not specify a MAGeT atlas library " \
                                    "this can not be done. \nTo run the pipeline without " \
                                    "using MAGeT to mask your input files, please also " \
                                    "specify: \n--maget-no-mask\n"
                    raise ValueError(err_msg_maget)

            import copy
            maget_options = copy.deepcopy(options)  #Namespace(maget=options)
            #maget_options
            #maget_options.maget = maget_options.mbm
            #maget_options.execution = options.execution
            #maget_options.application = options.application
            #maget_options.application.output_directory = os.path.join(options.application.output_directory, "segmentation")
            maget_options.maget = options.mbm.maget

            fixup_maget_options(maget_options=maget_options.maget,
                                nlin_options=maget_options.mbm.nlin,
                                lsq12_options=maget_options.mbm.lsq12)
            del maget_options.mbm

        #def with_new_output_dir(img : MincAtom):
        #img = copy.copy(img)
        #img.pipeline_sub_dir = img.pipeline_sub_dir + img.output_dir
        #img.
        #return img.newname_with_suffix(suffix="", subdir="segmentation")

    # FIXME it probably makes most sense if the lsq6 module itself (even within lsq6_nuc_inorm) handles the run_lsq6
    # setting (via use of the identity transform) since then this doesn't have to be implemented for every pipeline
    if options.mbm.lsq6.run_lsq6:
        lsq6_result = s.defer(
            lsq6_nuc_inorm(imgs=imgs,
                           resolution=resolution,
                           registration_targets=targets,
                           lsq6_dir=lsq6_dir,
                           lsq6_options=options.mbm.lsq6))
    else:
        # FIXME the code shouldn't branch here based on run_lsq6 (which should probably
        # be part of the lsq6 options rather than the MBM ones; see comments on #287.
        # TODO don't actually do this resampling if not required (i.e., if the imgs already have the same grids)??
        # however, for now need to add the masks:
        identity_xfm = s.defer(
            param2xfm(
                out_xfm=FileAtom(name=os.path.join(lsq6_dir, 'tmp', "id.xfm"),
                                 pipeline_sub_dir=lsq6_dir,
                                 output_sub_dir='tmp')))
        lsq6_result = [
            XfmHandler(source=img,
                       target=img,
                       xfm=identity_xfm,
                       resampled=s.defer(
                           mincresample_new(img=img,
                                            like=targets.registration_standard,
                                            xfm=identity_xfm))) for img in imgs
        ]
    # what about running nuc/inorm without a linear registration step??

    if with_maget and options.mbm.maget.maget.mask:
        masking_imgs = copy.deepcopy([xfm.resampled for xfm in lsq6_result])
        masked_img = (s.defer(
            maget_mask(imgs=masking_imgs,
                       resolution=resolution,
                       maget_options=maget_options.maget,
                       pipeline_sub_dir=os.path.join(
                           options.application.output_directory,
                           "%s_atlases" % prefix))))

        masked_img.index = masked_img.apply(lambda x: x.path)

        # replace any masks of the resampled images with the newly created masks:
        for xfm in lsq6_result:
            xfm.resampled = masked_img.loc[xfm.resampled.path]
    elif with_maget:
        warnings.warn(
            "Not masking your images from atlas masks after LSQ6 alignment ... probably not what you want "
            "(this can have negative effects on your registration and statistics)"
        )

    #full_hierarchy = get_nonlinear_configuration_from_options(nlin_protocol=options.mbm.nlin.nlin_protocol,
    #                                                          flag_nlin_protocol=next(iter(options.mbm.nlin.flags_.nlin_protocol)),
    #                                                         reg_method=options.mbm.nlin.reg_method,
    #                                                          file_resolution=resolution)

    #I = TypeVar("I")
    #X = TypeVar("X")
    #def wrap_minc(nlin_module: NLIN[I, X]) -> type[NLIN[MincAtom, XfmAtom]]:
    #    class N(NLIN[MincAtom, XfmAtom]): pass

    # TODO now the user has to call get_nonlinear_component followed by parse_<...>; previously various things
    # like lsq12_nlin_pairwise all branched on the reg_method so one didn't have to call get_nonlinear_component;
    # they could still do this if it can be done safety (i.e., not breaking assumptions of various nonlinear units)
    nlin_module = get_nonlinear_component(
        reg_method=options.mbm.nlin.reg_method)

    nlin_build_model_component = get_model_building_procedure(
        options.mbm.nlin.reg_strategy,
        # was: model_building.reg_strategy
        reg_module=nlin_module)

    # does this belong here?
    # def model_building_with_initial_target_generation(prelim_model_building_component,
    #                                                   final_model_building_component):
    #     class C(final_model_building_component):
    #         @staticmethod
    #         def build_model(imgs,
    #                         conf     : BuildModelConf,
    #                         nlin_dir,
    #                         nlin_prefix,
    #                         initial_target,
    #                         output_name_wo_ext = None): pass
    #
    #     return C

    #if options.mbm.model_building.prelim_reg_strategy is not None:
    #    prelim_nlin_build_model_component = get_model_building_procedure(options.mbm.model_building.prelim_reg_strategy,
    #                                                                     reg_module=nlin_module)
    #    nlin_build_model_component = model_building_with_initial_target_generation(
    #                                   final_model_building_component=nlin_build_model_component,
    #                                   prelim_model_building_component=prelim_nlin_build_model_component)

    # TODO don't use name 'x_module' for something that's technically not a module ... perhaps unit/component?

    # TODO tedious: why can't parse_build_model_protocol handle the null protocol case? is this something we want?
    nlin_conf = (nlin_build_model_component.parse_build_model_protocol(
        options.mbm.nlin.nlin_protocol, resolution=resolution)
                 if options.mbm.nlin.nlin_protocol is not None else
                 nlin_build_model_component.get_default_build_model_conf(
                     resolution=resolution))

    lsq12_nlin_result = s.defer(
        lsq12_nlin_build_model(
            nlin_module=nlin_build_model_component,
            imgs=[xfm.resampled for xfm in lsq6_result],
            lsq12_dir=lsq12_dir,
            nlin_dir=nlin_dir,
            nlin_prefix=prefix,
            use_robust_averaging=options.mbm.nlin.use_robust_averaging,
            resolution=resolution,
            lsq12_conf=options.mbm.lsq12,
            nlin_conf=nlin_conf))  #options.mbm.nlin

    inverted_xfms = [
        s.defer(invert_xfmhandler(xfm)) for xfm in lsq12_nlin_result.output
    ]

    if options.mbm.stats.stats_kernels:
        determinants = s.defer(
            determinants_at_fwhms(xfms=inverted_xfms,
                                  inv_xfms=lsq12_nlin_result.output,
                                  blur_fwhms=options.mbm.stats.stats_kernels))
    else:
        determinants = None

    overall_xfms = [
        s.defer(concat_xfmhandlers([rigid_xfm, lsq12_nlin_xfm])) for rigid_xfm,
        lsq12_nlin_xfm in zip(lsq6_result, lsq12_nlin_result.output)
    ]

    output_xfms = (
        pd.DataFrame({
            "rigid_xfm":
            lsq6_result,  # maybe don't return this if LSQ6 not run??
            "lsq12_nlin_xfm": lsq12_nlin_result.output,
            "overall_xfm": overall_xfms
        }))
    # we could `merge` the determinants with this table, but preserving information would cause lots of duplication
    # of the transforms (or storing determinants in more columns, but iterating over dynamically known columns
    # seems a bit odd ...)

    # TODO transpose these fields?})
    #avg_img=lsq12_nlin_result.avg_img,  # inconsistent w/ WithAvgImgs[...]-style outputs
    # "determinants"    : determinants })

    #output.avg_img = lsq12_nlin_result.avg_img
    #output.determinants = determinants   # TODO temporary - remove once incorporated properly into `output` proper
    # TODO add more of lsq12_nlin_result?

    # FIXME moved above rest of registration for debugging ... shouldn't use and destructively modify lsq6_result!!!
    if with_maget and options.mbm.segmentation.run_maget:
        maget_options = copy.deepcopy(maget_options)
        maget_options.maget.maget.mask = maget_options.maget.maget.mask_only = False  # already done above
        # use the original masks here otherwise the masking step will be re-run due to the previous masking run's
        # masks having been applied to the input images:
        maget_result = s.defer(
            maget(
                [xfm.resampled for xfm in lsq6_result],
                #[xfm.resampled for _ix, xfm in mbm_result.xfms.rigid_xfm.iteritems()],
                options=maget_options,
                prefix="%s_MAGeT" % prefix,
                output_dir=os.path.join(output_dir, prefix + "_processed")))
        # FIXME add pipeline dir to path and uncomment!
        #maget.to_csv(path_or_buf="segmentations.csv", columns=['img', 'voted_labels'])

    # TODO return some MAGeT stuff from MBM function ??
    # if options.mbm.mbm.run_maget:
    #     import copy
    #     maget_options = copy.deepcopy(options)  #Namespace(maget=options)
    #     #maget_options
    #     #maget_options.maget = maget_options.mbm
    #     #maget_options.execution = options.execution
    #     #maget_options.application = options.application
    #     maget_options.maget = options.mbm.maget
    #     del maget_options.mbm
    #
    #     s.defer(maget([xfm.resampled for xfm in lsq6_result],
    #                   options=maget_options,
    #                   prefix="%s_MAGeT" % prefix,
    #                   output_dir=os.path.join(output_dir, prefix + "_processed")))

    # should also move outside `mbm` function ...
    #if options.mbm.thickness.run_thickness:
    #    if not options.mbm.segmentation.run_maget:
    #        warnings.warn("MAGeT files (atlases, protocols) are needed to run thickness calculation.")
    #    # run MAGeT to segment the nlin average:
    #    import copy
    #    maget_options = copy.deepcopy(options)  #Namespace(maget=options)
    #    maget_options.maget = options.mbm.maget
    #    del maget_options.mbm
    #    segmented_avg = s.defer(maget(imgs=[lsq12_nlin_result.avg_img],
    #                                  options=maget_options,
    #                                  output_dir=os.path.join(options.application.output_directory,
    #                                                          prefix + "_processed"),
    #                                  prefix="%s_thickness_MAGeT" % prefix)).ix[0].img
    #    thickness = s.defer(cortical_thickness(xfms=pd.Series(inverted_xfms), atlas=segmented_avg,
    #                                           label_mapping=FileAtom(options.mbm.thickness.label_mapping),
    #                                           atlas_fwhm=0.56, thickness_fwhm=0.56))  # TODO magic fwhms
    #    # TODO write CSV -- should `cortical_thickness` do this/return a table?

    output = Namespace(avg_img=lsq12_nlin_result.avg_img,
                       xfms=output_xfms,
                       determinants=determinants)

    if with_maget and options.mbm.segmentation.run_maget:
        output.maget_result = maget_result

        nlin_maget = (
            s.defer(
                maget(
                    [lsq12_nlin_result.avg_img],
                    #[xfm.resampled for _ix, xfm in mbm_result.xfms.rigid_xfm.iteritems()],
                    options=maget_options,
                    prefix="%s_nlin_MAGeT" % prefix,
                    output_dir=os.path.join(
                        output_dir,
                        prefix + "_processed")))).iloc[0]  #.voted_labels
        #output.avg_img.mask = nlin_maget.mask  # makes more sense, but might have weird effects elsewhere
        output.avg_img.labels = nlin_maget.labels

    return Result(stages=s, output=output)
Example #3
0
def two_level(grouped_files_df, options : TwoLevelConf):
    """
    grouped_files_df - must contain 'group':<any comparable, sortable type> and 'file':MincAtom columns
    """  # TODO weird naming since the grouped_files_df isn't a GroupBy object?  just files_df?
    s = Stages()

    if grouped_files_df.isnull().values.any():
        raise ValueError("NaN values in input dataframe; can't go")

    if options.mbm.lsq6.target_type == TargetType.bootstrap:
        # won't work since the second level part tries to get the resolution of *its* "first input file", which
        # hasn't been created.  We could instead pass in a resolution to the `mbm` function,
        # but instead disable for now:
        raise ValueError("Bootstrap model building currently doesn't work with this pipeline; "
                         "just specify an initial target instead")
    elif options.mbm.lsq6.target_type == TargetType.pride_of_models:
        pride_of_models_mapping = get_pride_of_models_mapping(pride_csv=options.mbm.lsq6.target_file,
                                                              output_dir=options.application.output_directory,
                                                              pipeline_name=options.application.pipeline_name)

    # FIXME this is the same as in the 'tamarack' except for names of arguments/enclosing variables
    def group_options(options, group):
        options = copy.deepcopy(options)

        if options.mbm.lsq6.target_type == TargetType.pride_of_models:

            targets = get_closest_model_from_pride_of_models(pride_of_models_dict=pride_of_models_mapping,
                                                             time_point=group)

            options.mbm.lsq6 = options.mbm.lsq6.replace(target_type=TargetType.initial_model,
                                                        target_file=targets.registration_standard.path)
        else:
            # this will ensure that all groups have the same resolution -- is it necessary?
            targets = registration_targets(lsq6_conf=options.mbm.lsq6,
                                           app_conf=options.application,
                                           first_input_file=grouped_files_df.file.iloc[0])

        resolution = (options.registration.resolution
                        or get_resolution_from_file(targets.registration_standard.path))
        options.registration = options.registration.replace(resolution=resolution)
        return options

    first_level_results = (
        grouped_files_df
        .groupby('group', as_index=False, sort=False)       # the usual annoying pattern to do a aggregate with access
        .aggregate({ 'file' : lambda files: list(files) })  # to the groupby object's keys ... TODO: fix
        .rename(columns={ 'file' : "files" })
        .assign(build_model=lambda df:
                              df.apply(axis=1,
                                       func=lambda row:
                                              s.defer(mbm(imgs=row.files,
                                                          options=group_options(options, row.group),
                                                          prefix="%s" % row.group,
                                                          output_dir=os.path.join(
                                                              options.application.output_directory,
                                                              options.application.pipeline_name + "_first_level",
                                                              "%s_processed" % row.group)))))
        )
    # TODO replace .assign(...apply(...)...) with just an apply, producing a series right away?

    # FIXME right now the same options set is being used for both levels -- use options.first/second_level
    second_level_options = copy.deepcopy(options)
    second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(run_lsq6=False)

    # FIXME this is probably a hack -- instead add a --second-level-init-model option to specify which timepoint should be used
    # as the initial model in the second level ???  (at this point it doesn't matter due to lack of lsq6 ...)
    if second_level_options.mbm.lsq6.target_type == TargetType.pride_of_models:
        second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(
            target_type=TargetType.target,  # target doesn't really matter as no lsq6 here, just used for resolution...
            target_file=list(pride_of_models_mapping.values())[0].registration_standard.path)

    # NOTE: running lsq6_nuc_inorm here doesn't work in general (but possibly with rotational minctracc)
    # since the native-space initial model is used, but our images are
    # already in standard space (as we resampled there after the 1st-level lsq6).
    # On the other hand, we might want to run it here (although of course NOT nuc/inorm!) in the future,
    # for instance given a 'pride' of models (one for each group).

    second_level_results = s.defer(mbm(imgs=first_level_results.build_model.map(lambda m: m.avg_img),
                                       options=second_level_options,
                                       prefix=os.path.join(options.application.output_directory,
                                                           options.application.pipeline_name + "_second_level")))

    # FIXME sadly, `mbm` doesn't return a pd.Series of xfms, so we don't have convenient indexing ...
    overall_xfms = [s.defer(concat_xfmhandlers([xfm_1, xfm_2]))
                    for xfms_1, xfm_2 in zip([r.xfms.lsq12_nlin_xfm for r in first_level_results.build_model],
                                             second_level_results.xfms.overall_xfm)
                    for xfm_1 in xfms_1]
    resample  = np.vectorize(mincresample_new, excluded={"extra_flags"})
    defer     = np.vectorize(s.defer)

    # TODO using the avg_img here is a bit clunky -- maybe better to propagate group indices ...
    # only necessary since `mbm` doesn't return DataFrames but namespaces ...
    first_level_determinants = pd.concat(list(first_level_results.build_model.apply(
                                                lambda x: x.determinants.assign(first_level_avg=x.avg_img))),
                                         ignore_index=True)

    resampled_determinants = (pd.merge(
        left=first_level_determinants,
        right=pd.DataFrame({'group_xfm' : second_level_results.xfms.overall_xfm})
              .assign(source=lambda df: df.group_xfm.apply(lambda r: r.source)),
        left_on="first_level_avg",
        right_on="source")
        .assign(resampled_log_full_det=lambda df: defer(resample(img=df.log_full_det,
                                                                 xfm=df.group_xfm.apply(lambda x: x.xfm),
                                                                 like=second_level_results.avg_img)),
                resampled_log_nlin_det=lambda df: defer(resample(img=df.log_nlin_det,
                                                                 xfm=df.group_xfm.apply(lambda x: x.xfm),
                                                                 like=second_level_results.avg_img))))
    # TODO only resamples the log determinants, but still a bit ugly ... abstract somehow?
    # TODO shouldn't be called resampled_determinants since this is basically the whole (first_level) thing ...

    inverted_overall_xfms = [s.defer(invert_xfmhandler(xfm)) for xfm in overall_xfms]

    overall_determinants = (s.defer(determinants_at_fwhms(
                                     xfms=inverted_overall_xfms,
                                     inv_xfms=overall_xfms,
                                     blur_fwhms=options.mbm.stats.stats_kernels))
                            .assign(overall_log_full_det=lambda df: df.log_full_det,
                                    overall_log_nlin_det=lambda df: df.log_nlin_det)
                            .drop(['log_full_det', 'log_nlin_det'], axis=1))

    # TODO return some MAGeT stuff from two_level function ??
    # FIXME running MAGeT from within the `two_level` function has the same problem as running it from within `mbm`:
    # it will now run when this pipeline is called from within another one (e.g., n-level), which will be
    # redundant, create filename clashes, etc. -- this should be moved to `two_level_pipeline`.
    if options.mbm.segmentation.run_maget:
        maget_options = copy.deepcopy(options)
        maget_options.maget = options.mbm.maget
        fixup_maget_options(maget_options=maget_options.maget,
                            lsq12_options=maget_options.mbm.lsq12,
                            nlin_options=maget_options.mbm.nlin)
        del maget_options.mbm

        # again using a weird combination of vectorized and loop constructs ...
        s.defer(maget([xfm.resampled for _ix, m in first_level_results.iterrows()
                       for xfm in m.build_model.xfms.rigid_xfm],
                      options=maget_options,
                      prefix="%s_MAGeT" % options.application.pipeline_name,
                      output_dir=os.path.join(options.application.output_directory,
                                              options.application.pipeline_name + "_processed")))

    # TODO resampling to database model ...

    # TODO there should be one table containing all determinants (first level, overall, resampled first level) for each file
    # and another containing some groupwise information (averages and transforms to the common average)
    return Result(stages=s, output=Namespace(first_level_results=first_level_results,
                                             resampled_determinants=resampled_determinants,
                                             overall_determinants=overall_determinants))