Example #1
0
    def _scale_to_p(self, stat_values, scale_values):
        """Compute p- and z-values.

        Parameters
        ----------
        stat_values : (V) array
            ALE values.
        scale_values : (I x V) array
            Permutation ALE values.

        Returns
        -------
        p_values : (V) array
        z_values : (V) array

        Notes
        -----
        This method also uses the "histogram_bins" element in the null_distributions_ attribute.
        """
        n_voxels = stat_values.shape[0]

        # I know that joblib probably preserves order of outputs, but I'm paranoid, so we track
        # the iteration as well and sort the resulting p-value array based on that.
        with tqdm_joblib(tqdm(total=n_voxels)):
            p_values, voxel_idx = zip(*Parallel(n_jobs=self.n_cores)(
                delayed(self._scale_to_p_voxel)(i_voxel, stat_values[i_voxel],
                                                scale_values[:, i_voxel])
                for i_voxel in range(n_voxels)))
        # Convert to an array and sort the p-values array based on the voxel index.
        p_values = np.array(p_values)[np.array(voxel_idx)]

        z_values = p_to_z(p_values, tail="one")
        return p_values, z_values
Example #2
0
    def _fit(self, dataset):
        """Perform specific coactivation likelihood estimation meta-analysis on dataset.

        Parameters
        ----------
        dataset : :obj:`~nimare.dataset.Dataset`
            Dataset to analyze.
        """
        self.dataset = dataset
        self.masker = self.masker or dataset.masker
        self.null_distributions_ = {}

        ma_values = self._collect_ma_maps(
            coords_key="coordinates",
            maps_key="ma_maps",
        )

        # Determine bins for null distribution histogram
        max_ma_values = np.max(ma_values, axis=1)
        max_poss_ale = self._compute_summarystat_est(max_ma_values)
        self.null_distributions_["histogram_bins"] = np.round(
            np.arange(0, max_poss_ale + 0.001, 0.0001), 4
        )

        stat_values = self._compute_summarystat_est(ma_values)

        iter_df = self.inputs_["coordinates"].copy()
        rand_idx = np.random.choice(self.xyz.shape[0], size=(iter_df.shape[0], self.n_iters))
        rand_xyz = self.xyz[rand_idx, :]
        iter_xyzs = np.split(rand_xyz, rand_xyz.shape[1], axis=1)

        perm_scale_values = np.memmap(
            self.memmap_filenames[1],
            dtype=stat_values.dtype,
            mode="w+",
            shape=(self.n_iters, stat_values.shape[0]),
        )
        with tqdm_joblib(tqdm(total=self.n_iters)):
            Parallel(n_jobs=self.n_cores)(
                delayed(self._run_permutation)(
                    i_iter, iter_xyzs[i_iter], iter_df, perm_scale_values
                )
                for i_iter in range(self.n_iters)
            )

        p_values, z_values = self._scale_to_p(stat_values, perm_scale_values)

        if isinstance(perm_scale_values, np.memmap):
            LGR.debug(f"Closing memmap at {perm_scale_values.filename}")
            perm_scale_values._mmap.close()

        del perm_scale_values

        logp_values = -np.log10(p_values)
        logp_values[np.isinf(logp_values)] = -np.log10(np.finfo(float).eps)

        # Write out unthresholded value images
        images = {"stat": stat_values, "logp": logp_values, "z": z_values}
        return images
Example #3
0
    def _compute_null_montecarlo(self, n_iters, n_cores):
        """Compute uncorrected null distribution using Monte Carlo method.

        Parameters
        ----------
        n_iters : int
            Number of permutations.
        n_cores : int
            Number of cores to use.

        Notes
        -----
        This method adds two entries to the null_distributions_ dict attribute:
        "histweights_corr-none_method-montecarlo" and
        "histweights_level-voxel_corr-fwe_method-montecarlo".
        """
        null_ijk = np.vstack(np.where(self.masker.mask_img.get_fdata())).T

        n_cores = _check_ncores(n_cores)

        rand_idx = np.random.choice(
            null_ijk.shape[0],
            size=(self.inputs_["coordinates"].shape[0], n_iters),
        )
        rand_ijk = null_ijk[rand_idx, :]
        rand_xyz = vox2mm(rand_ijk, self.masker.mask_img.affine)
        iter_xyzs = np.split(rand_xyz, rand_xyz.shape[1], axis=1)
        iter_df = self.inputs_["coordinates"].copy()

        with tqdm_joblib(tqdm(total=n_iters)):
            perm_histograms = Parallel(n_jobs=n_cores)(
                delayed(self._compute_null_montecarlo_permutation)(
                    iter_xyzs[i_iter], iter_df=iter_df)
                for i_iter in range(n_iters))

        perm_histograms = np.vstack(perm_histograms)
        self.null_distributions_[
            "histweights_corr-none_method-montecarlo"] = np.sum(
                perm_histograms, axis=0)

        fwe_voxel_max = np.apply_along_axis(_get_last_bin, 1, perm_histograms)
        histweights = np.zeros(perm_histograms.shape[1],
                               dtype=perm_histograms.dtype)
        for perm in fwe_voxel_max:
            histweights[perm] += 1

        self.null_distributions_[
            "histweights_level-voxel_corr-fwe_method-montecarlo"] = histweights
Example #4
0
    def transform(self, result):
        """Apply the analysis to a MetaResult.

        Parameters
        ----------
        result : :obj:`~nimare.results.MetaResult`
            A MetaResult produced by a coordinate- or image-based meta-analysis.

        Returns
        -------
        contribution_table : :obj:`pandas.DataFrame`
            A DataFrame with information about relative contributions of each experiment to each
            cluster in the thresholded map.
            There is one row for each experiment, as well as one more row at the top of the table
            (below the header), which has the center of mass of each cluster.
            The centers of mass are not guaranteed to fall within the actual clusters, but can
            serve as a useful heuristic for identifying them.
            There is one column for each cluster, with column names being integers indicating the
            cluster's associated value in the ``labeled_cluster_img`` output.
        labeled_cluster_img : :obj:`nibabel.nifti1.Nifti1Image`
            The labeled, thresholded map that is used to identify clusters characterized by this
            analysis.
            Each cluster in the map has a single value, which corresponds to the cluster's column
            name in ``contribution_table``.
        """
        if not hasattr(result.estimator, "dataset"):
            raise AttributeError(
                "MetaResult was not generated by an Estimator with a `dataset` attribute. "
                "This may be because the Estimator was a pairwise Estimator. "
                "The Jackknife method does not currently work with pairwise Estimators."
            )
        dset = result.estimator.dataset
        # We need to copy the estimator because it will otherwise overwrite the original version
        # with one missing a study in its inputs.
        estimator = copy.deepcopy(result.estimator)
        original_masker = estimator.masker

        # Collect the thresholded cluster map
        if self.target_image in result.maps:
            target_img = result.get_map(self.target_image, return_type="image")
        else:
            available_maps = [f"'{m}'" for m in result.maps.keys()]
            raise ValueError(
                f"Target image ('{self.target_image}') not present in result. "
                f"Available maps in result are: {', '.join(available_maps)}.")

        if self.voxel_thresh:
            thresh_img = image.threshold_img(target_img, self.voxel_thresh)
        else:
            thresh_img = target_img

        thresh_arr = thresh_img.get_fdata()

        # CBMAs have "stat" maps, while most IBMAs have "est" maps.
        # Fisher's and Stouffer's only have "z" maps though.
        if "est" in result.maps:
            target_value_map = "est"
        elif "stat" in result.maps:
            target_value_map = "stat"
        else:
            target_value_map = "z"

        stat_values = result.get_map(target_value_map, return_type="array")

        # Use study IDs in inputs_ instead of dataset, because we don't want to try fitting the
        # estimator to a study that might have been filtered out by the estimator's criteria.
        meta_ids = estimator.inputs_["id"]
        rows = ["Center of Mass"] + list(meta_ids)

        # Let's label the clusters in the thresholded map so we can use it as a NiftiLabelsMasker
        # This won't work when the Estimator's masker isn't a NiftiMasker... :(
        conn = ndimage.generate_binary_structure(3, 2)
        labeled_cluster_arr, n_clusters = ndimage.measurements.label(
            thresh_arr, conn)
        labeled_cluster_img = nib.Nifti1Image(
            labeled_cluster_arr,
            affine=target_img.affine,
            header=target_img.header,
        )

        if n_clusters == 0:
            LGR.warning("No clusters found")
            contribution_table = pd.DataFrame(index=rows)
            return contribution_table, labeled_cluster_img

        # Identify center of mass for each cluster
        # This COM may fall outside the cluster, but it is a useful heuristic for identifying them
        cluster_ids = list(range(1, n_clusters + 1))
        cluster_coms = ndimage.center_of_mass(
            labeled_cluster_arr,
            labeled_cluster_arr,
            cluster_ids,
        )
        cluster_coms = np.array(cluster_coms)
        cluster_coms = vox2mm(cluster_coms, target_img.affine)

        cluster_com_strs = []
        for i_peak in range(len(cluster_ids)):
            x, y, z = cluster_coms[i_peak, :].astype(int)
            xyz_str = f"({x}, {y}, {z})"
            cluster_com_strs.append(xyz_str)

        # Mask using a labels masker, so that we can easily get the mean value for each cluster
        cluster_masker = input_data.NiftiLabelsMasker(labeled_cluster_img)
        cluster_masker.fit(labeled_cluster_img)

        # Create empty contribution table
        contribution_table = pd.DataFrame(index=rows, columns=cluster_ids)
        contribution_table.index.name = "Cluster ID"
        contribution_table.loc["Center of Mass"] = cluster_com_strs

        with tqdm_joblib(tqdm(total=len(meta_ids))):
            jackknife_results = Parallel(n_jobs=self.n_cores)(
                delayed(self._transform)(
                    study_id,
                    all_ids=meta_ids,
                    dset=dset,
                    estimator=estimator,
                    target_value_map=target_value_map,
                    stat_values=stat_values,
                    original_masker=original_masker,
                    cluster_masker=cluster_masker,
                ) for study_id in meta_ids)

        # Add the results to the table
        for expid, stat_prop_values in jackknife_results:
            contribution_table.loc[expid] = stat_prop_values

        return contribution_table, labeled_cluster_img
Example #5
0
    def _fit(self, dataset1, dataset2):
        self.dataset1 = dataset1
        self.dataset2 = dataset2
        self.masker = self.masker or dataset1.masker

        ma_maps1 = self._collect_ma_maps(
            maps_key="ma_maps1",
            coords_key="coordinates1",
            fname_idx=0,
        )
        ma_maps2 = self._collect_ma_maps(
            maps_key="ma_maps2",
            coords_key="coordinates2",
            fname_idx=1,
        )

        n_grp1, n_voxels = ma_maps1.shape

        # Get ALE values for the two groups and difference scores
        grp1_ale_values = 1.0 - np.prod(1.0 - ma_maps1, axis=0)
        grp2_ale_values = 1.0 - np.prod(1.0 - ma_maps2, axis=0)
        diff_ale_values = grp1_ale_values - grp2_ale_values
        del grp1_ale_values, grp2_ale_values

        # Combine the MA maps into a single array to draw from for null distribution
        ma_arr = np.vstack((ma_maps1, ma_maps2))

        if isinstance(ma_maps1, np.memmap):
            LGR.debug(f"Closing memmap at {ma_maps1.filename}")
            ma_maps1._mmap.close()

        if isinstance(ma_maps2, np.memmap):
            LGR.debug(f"Closing memmap at {ma_maps2.filename}")
            ma_maps2._mmap.close()

        del ma_maps1, ma_maps2

        # Calculate null distribution for each voxel based on group-assignment randomization
        # Use a memmapped 2D array
        iter_diff_values = np.memmap(
            self.memmap_filenames[2],
            dtype=ma_arr.dtype,
            mode="w+",
            shape=(self.n_iters, n_voxels),
        )

        with tqdm_joblib(tqdm(total=self.n_iters)):
            Parallel(n_jobs=self.n_cores)(delayed(self._run_permutation)(
                i_iter, n_grp1, ma_arr, iter_diff_values)
                                          for i_iter in range(self.n_iters))

        # Determine p-values based on voxel-wise null distributions
        # I know that joblib probably preserves order of outputs, but I'm paranoid, so we track
        # the iteration as well and sort the resulting p-value array based on that.
        with tqdm_joblib(tqdm(total=n_voxels)):
            p_values, voxel_idx = zip(*Parallel(n_jobs=self.n_cores)(
                delayed(self._alediff_to_p_voxel)(
                    i_voxel,
                    diff_ale_values[i_voxel],
                    iter_diff_values[:, i_voxel],
                ) for i_voxel in range(n_voxels)))
        # Convert to an array and sort the p-values array based on the voxel index.
        p_values = np.array(p_values)[np.array(voxel_idx)]

        diff_signs = np.sign(diff_ale_values -
                             np.median(iter_diff_values, axis=0))

        if isinstance(iter_diff_values, np.memmap):
            LGR.debug(f"Closing memmap at {iter_diff_values.filename}")
            iter_diff_values._mmap.close()

        del iter_diff_values

        z_arr = p_to_z(p_values, tail="two") * diff_signs
        logp_arr = -np.log10(p_values)

        images = {
            "stat_desc-group1MinusGroup2": diff_ale_values,
            "p_desc-group1MinusGroup2": p_values,
            "z_desc-group1MinusGroup2": z_arr,
            "logp_desc-group1MinusGroup2": logp_arr,
        }
        return images
Example #6
0
    def correct_fwe_montecarlo(
        self,
        result,
        voxel_thresh=0.001,
        n_iters=10000,
        n_cores=1,
        vfwe_only=False,
    ):
        """Perform FWE correction using the max-value permutation method.

        Only call this method from within a Corrector.

        .. versionchanged:: 0.0.12

            * Fix the ``vfwe_only`` option.

        .. versionchanged:: 0.0.11

            * Rename ``*_level-cluster`` maps to ``*_desc-size_level-cluster``.
            * Add new ``*_desc-mass_level-cluster`` maps that use cluster mass-based inference.

        Parameters
        ----------
        result : :obj:`~nimare.results.MetaResult`
            Result object from a CBMA meta-analysis.
        voxel_thresh : :obj:`float`, optional
            Cluster-defining p-value threshold. Default is 0.001.
        n_iters : :obj:`int`, optional
            Number of iterations to build the voxel-level, cluster-size, and cluster-mass FWE
            null distributions. Default is 10000.
        n_cores : :obj:`int`, optional
            Number of cores to use for parallelization.
            If <=0, defaults to using all available cores. Default is 1.
        vfwe_only : :obj:`bool`, optional
            If True, only calculate the voxel-level FWE-corrected maps. Voxel-level correction
            can be performed very quickly if the Estimator's ``null_method`` was "montecarlo".
            Default is False.

        Returns
        -------
        images : :obj:`dict`
            Dictionary of 1D arrays corresponding to masked images generated by
            the correction procedure. The following arrays are generated by
            this method:

            -   ``logp_desc-size_level-cluster``: Cluster-level FWE-corrected ``-log10(p)`` map
                based on cluster size. This was previously simply called "logp_level-cluster".
                This array is **not** generated if ``vfwe_only`` is ``True``.
            -   ``logp_desc-mass_level-cluster``: Cluster-level FWE-corrected ``-log10(p)`` map
                based on cluster mass. According to :footcite:t:`bullmore1999global` and
                :footcite:t:`zhang2009cluster`, cluster mass-based inference is more powerful than
                cluster size.
                This array is **not** generated if ``vfwe_only`` is ``True``.
            -   ``logp_level-voxel``: Voxel-level FWE-corrected ``-log10(p)`` map.
                Voxel-level correction is generally more conservative than cluster-level
                correction, so it is only recommended for very large meta-analyses
                (i.e., hundreds of studies), per :footcite:t:`eickhoff2016behavior`.

        Notes
        -----
        If ``vfwe_only`` is ``False``, this method adds three new keys to the
        ``null_distributions_`` attribute:

            -   ``values_level-voxel_corr-fwe_method-montecarlo``: The maximum summary statistic
                value from each Monte Carlo iteration. An array of shape (n_iters,).
            -   ``values_desc-size_level-cluster_corr-fwe_method-montecarlo``: The maximum cluster
                size from each Monte Carlo iteration. An array of shape (n_iters,).
            -   ``values_desc-mass_level-cluster_corr-fwe_method-montecarlo``: The maximum cluster
                mass from each Monte Carlo iteration. An array of shape (n_iters,).

        See Also
        --------
        nimare.correct.FWECorrector : The Corrector from which to call this method.

        References
        ----------
        .. footbibliography::

        Examples
        --------
        >>> meta = MKDADensity()
        >>> result = meta.fit(dset)
        >>> corrector = FWECorrector(method='montecarlo', voxel_thresh=0.01,
                                     n_iters=5, n_cores=1)
        >>> cresult = corrector.transform(result)
        """
        stat_values = result.get_map("stat", return_type="array")

        if vfwe_only and (self.null_method == "montecarlo"):
            LGR.info(
                "Using precalculated histogram for voxel-level FWE correction."
            )

            # Determine p- and z-values from stat values and null distribution.
            p_vfwe_values = nullhist_to_p(
                stat_values,
                self.null_distributions_[
                    "histweights_level-voxel_corr-fwe_method-montecarlo"],
                self.null_distributions_["histogram_bins"],
            )

        else:
            if vfwe_only:
                LGR.warn(
                    "In order to run this method with the 'vfwe_only' option, "
                    "the Estimator must use the 'montecarlo' null_method. "
                    "Running permutations from scratch.")

            null_xyz = vox2mm(
                np.vstack(np.where(self.masker.mask_img.get_fdata())).T,
                self.masker.mask_img.affine,
            )

            n_cores = _check_ncores(n_cores)

            # Identify summary statistic corresponding to intensity threshold
            ss_thresh = self._p_to_summarystat(voxel_thresh)

            rand_idx = np.random.choice(
                null_xyz.shape[0],
                size=(self.inputs_["coordinates"].shape[0], n_iters),
            )
            rand_xyz = null_xyz[rand_idx, :]
            iter_xyzs = np.split(rand_xyz, rand_xyz.shape[1], axis=1)
            iter_df = self.inputs_["coordinates"].copy()

            # Define connectivity matrix for cluster labeling
            conn = ndimage.generate_binary_structure(3, 2)

            with tqdm_joblib(tqdm(total=n_iters)):
                perm_results = Parallel(n_jobs=n_cores)(
                    delayed(self._correct_fwe_montecarlo_permutation)(
                        iter_xyzs[i_iter],
                        iter_df=iter_df,
                        conn=conn,
                        voxel_thresh=ss_thresh,
                        vfwe_only=vfwe_only,
                    ) for i_iter in range(n_iters))

            fwe_voxel_max, fwe_cluster_size_max, fwe_cluster_mass_max = zip(
                *perm_results)

            if not vfwe_only:
                # Cluster-level FWE
                # Extract the summary statistics in voxel-wise (3D) form, threshold, and
                # cluster-label
                thresh_stat_values = self.masker.inverse_transform(
                    stat_values).get_fdata()
                thresh_stat_values[thresh_stat_values <= ss_thresh] = 0
                labeled_matrix, _ = ndimage.measurements.label(
                    thresh_stat_values, conn)

                cluster_labels, idx, cluster_sizes = np.unique(
                    labeled_matrix,
                    return_inverse=True,
                    return_counts=True,
                )
                assert cluster_labels[0] == 0

                # Cluster mass-based inference
                cluster_masses = np.zeros(cluster_labels.shape)
                for i_val in cluster_labels:
                    if i_val == 0:
                        cluster_masses[i_val] = 0

                    cluster_mass = np.sum(
                        thresh_stat_values[labeled_matrix == i_val] -
                        ss_thresh)
                    cluster_masses[i_val] = cluster_mass

                p_cmfwe_vals = null_to_p(cluster_masses, fwe_cluster_mass_max,
                                         "upper")
                p_cmfwe_map = p_cmfwe_vals[np.reshape(idx,
                                                      labeled_matrix.shape)]

                p_cmfwe_values = np.squeeze(
                    self.masker.transform(
                        nib.Nifti1Image(p_cmfwe_map,
                                        self.masker.mask_img.affine)))
                logp_cmfwe_values = -np.log10(p_cmfwe_values)
                logp_cmfwe_values[np.isinf(logp_cmfwe_values)] = -np.log10(
                    np.finfo(float).eps)
                z_cmfwe_values = p_to_z(p_cmfwe_values, tail="one")

                # Cluster size-based inference
                cluster_sizes[
                    0] = 0  # replace background's "cluster size" with zeros
                p_csfwe_vals = null_to_p(cluster_sizes, fwe_cluster_size_max,
                                         "upper")
                p_csfwe_map = p_csfwe_vals[np.reshape(idx,
                                                      labeled_matrix.shape)]

                p_csfwe_values = np.squeeze(
                    self.masker.transform(
                        nib.Nifti1Image(p_csfwe_map,
                                        self.masker.mask_img.affine)))
                logp_csfwe_values = -np.log10(p_csfwe_values)
                logp_csfwe_values[np.isinf(logp_csfwe_values)] = -np.log10(
                    np.finfo(float).eps)
                z_csfwe_values = p_to_z(p_csfwe_values, tail="one")

                self.null_distributions_[
                    "values_desc-size_level-cluster_corr-fwe_method-montecarlo"] = fwe_cluster_size_max
                self.null_distributions_[
                    "values_desc-mass_level-cluster_corr-fwe_method-montecarlo"] = fwe_cluster_mass_max

            # Voxel-level FWE
            LGR.info("Using null distribution for voxel-level FWE correction.")
            p_vfwe_values = null_to_p(stat_values, fwe_voxel_max, tail="upper")
            self.null_distributions_[
                "values_level-voxel_corr-fwe_method-montecarlo"] = fwe_voxel_max

        z_vfwe_values = p_to_z(p_vfwe_values, tail="one")
        logp_vfwe_values = -np.log10(p_vfwe_values)
        logp_vfwe_values[np.isinf(logp_vfwe_values)] = -np.log10(
            np.finfo(float).eps)

        if vfwe_only:
            # Return unthresholded value images
            images = {
                "logp_level-voxel": logp_vfwe_values,
                "z_level-voxel": z_vfwe_values,
            }

        else:
            # Return unthresholded value images
            images = {
                "logp_level-voxel": logp_vfwe_values,
                "z_level-voxel": z_vfwe_values,
                "logp_desc-size_level-cluster": logp_csfwe_values,
                "z_desc-size_level-cluster": z_csfwe_values,
                "logp_desc-mass_level-cluster": logp_cmfwe_values,
                "z_desc-mass_level-cluster": z_cmfwe_values,
            }

        return images