예제 #1
0
    def get_studies_by_mask(self, mask):
        """Extract list of studies with at least one coordinate in mask.

        Parameters
        ----------
        mask : img_like
            Mask across which to search for coordinates.

        Returns
        -------
        found_ids : :obj:`list`
            A list of IDs from the Dataset with at least one focus in the mask.
        """
        from scipy.spatial.distance import cdist

        mask = load_niimg(mask)

        dset_mask = self.masker.mask_img
        if not np.array_equal(dset_mask.affine, mask.affine):
            LGR.warning(
                "Mask affine does not match Dataset affine. Assuming same space."
            )

        dset_ijk = mm2vox(self.coordinates[["x", "y", "z"]].values,
                          mask.affine)
        mask_ijk = np.vstack(np.where(mask.get_fdata())).T
        distances = cdist(mask_ijk, dset_ijk)
        distances = np.any(distances == 0, axis=0)
        found_ids = list(self.coordinates.loc[distances, "id"].unique())
        return found_ids
예제 #2
0
    def get_studies_by_mask(self, mask):
        """Extract list of studies with at least one coordinate in mask.

        Parameters
        ----------
        mask : img_like
            Mask across which to search for coordinates.

        Returns
        -------
        found_ids : :obj:`list`
            A list of IDs from the Dataset with at least one focus in the mask.
        """
        from scipy.spatial.distance import cdist

        mask = load_niimg(mask)

        dset_mask = self.masker.mask_img
        if not np.array_equal(dset_mask.affine, mask.affine):
            from nilearn.image import resample_to_img

            mask = resample_to_img(mask, dset_mask, interpolation="nearest")
        mask_ijk = np.vstack(np.where(mask.get_fdata())).T
        distances = cdist(mask_ijk, self.coordinates[["i", "j", "k"]].values)
        distances = np.any(distances == 0, axis=0)
        found_ids = list(self.coordinates.loc[distances, "id"].unique())
        return found_ids
예제 #3
0
파일: nifti.py 프로젝트: poldrack/nideconv
    def __init__(self,
                 func_img,
                 sample_rate,
                 mask=None,
                 oversample_design_matrix=20,
                 add_intercept=True,
                 detrend=False,
                 standardize=False,
                 confounds_for_extraction=None,
                 memory=None,
                 **kwargs):


        if isinstance(confounds_for_extraction, pd.DataFrame):
            confounds_for_extraction = confounds_for_extraction.values

        self.confounds = confounds_for_extraction

        if isinstance(mask, input_data.NiftiMasker):
            self.masker = mask
        else:

            if mask is None:
                logging.warn('No mask has been given. Nilearn will automatically try to'\
                              'make one')
            else:
                mask = load_niimg(mask)
            self.masker = input_data.NiftiMasker(mask,
                                                 detrend=detrend,
                                                 standardize=standardize,
                                                 memory=memory)


        input_signal = self.masker.fit_transform(func_img,
                                                 confounds=confounds_for_extraction)
        self.n_voxels = input_signal.shape[1]

        super(NiftiResponseFitter, self).__init__(input_signal=input_signal,
                                           sample_rate=sample_rate,
                                           oversample_design_matrix=oversample_design_matrix,
                                           add_intercept=add_intercept,
                                           **kwargs)
예제 #4
0
파일: nifti.py 프로젝트: poldrack/nideconv
    def get_timecourses(self, 
                        oversample=None,
                        average_over_mask=False,
                        transform_to_niftis=True,
                        **kwargs
                        ):

        if len(self.events) is 0:
            raise Exception("No events were added")

        timecourses = super(NiftiResponseFitter, self).get_timecourses(oversample=oversample,
                                                                       melt=False,
                                                                       **kwargs)
        if transform_to_niftis:
            if average_over_mask:
                
                average_over_mask = load_niimg(average_over_mask)

                weights = image.math_img('mask / mask.sum()', 
                                         mask=average_over_mask)

                weights = self.masker.fit_transform(weights)

                timecourses = timecourses.dot(weights.T) 
                return timecourses.sum(1)

            else:
                tc_df = []
                for (event_type, covariate, time), tc in timecourses.groupby(level=['event type', 'covariate', 'time']):
                    #timepoints = tc.index.get_level_values('time')
                    tc_nii = self._inverse_transform(tc)
                    tc = pd.DataFrame([tc_nii], index=pd.MultiIndex.from_tuples([(event_type, covariate, time)],
                                                                             names=['event type', 'covariate', 'time']),
                                      columns=['nii'])
                    tc_df.append(tc)

                return pd.concat(tc_df)
        else:
            return timecourses
예제 #5
0
    def __init__(
        self,
        count_df,
        coordinates_df,
        mask="mni152_2mm",
        n_topics=100,
        n_regions=2,
        symmetric=True,
        alpha=0.1,
        beta=0.01,
        gamma=0.01,
        delta=1.0,
        dobs=25,
        roi_size=50.0,
        seed_init=1,
    ):
        LGR.info("Constructing/Initializing GCLDA Model")
        count_df = count_df.copy()
        coordinates_df = coordinates_df.copy()

        # Check IDs from DataFrames
        count_df.index = count_df.index.astype(str)
        count_df["id"] = count_df.index
        count_ids = count_df.index.tolist()
        if "id" not in coordinates_df.columns:
            coordinates_df["id"] = coordinates_df.index
        coordinates_df["id"] = coordinates_df["id"].astype(str)
        coord_ids = sorted(list(set(coordinates_df["id"].tolist())))
        ids = sorted(list(set(count_ids).intersection(coord_ids)))
        if len(count_ids) != len(coord_ids) != len(ids):
            union_ids = sorted(list(set(count_ids + coord_ids)))
            LGR.warning(
                f"IDs mismatch detected: retaining {len(ids)} of {len(union_ids)} unique IDs"
            )
        self.ids = ids

        # Reduce inputs based on shared IDs
        count_df = count_df.loc[count_df["id"].isin(ids)]
        coordinates_df = coordinates_df.loc[coordinates_df["id"].isin(ids)]

        # --- Checking to make sure parameters are valid
        if (symmetric is True) and (n_regions % 2 != 0):
            # symmetric model only valid if R = 2
            raise ValueError("Cannot run a symmetric model unless n_regions is even.")

        # Initialize sampling parameters
        # The global sampling iteration of the model
        self.iter = 0
        # Current random seed (is incremented after initialization and each sampling update)
        self.seed = 0

        # Set up model hyperparameters
        # Pseudo-count hyperparams need to be floats so that when sampling
        # distributions are computed the count matrices/vectors are converted
        # to floats
        self.params = {
            "n_topics": n_topics,  # Number of topics (T)
            "n_regions": n_regions,  # Number of subregions (R)
            "alpha": alpha,  # Prior count on topics for each doc
            "beta": beta,  # Prior count on word-types for each topic
            "gamma": gamma,  # Prior count added to y-counts when sampling z assignments
            "delta": delta,  # Prior count on subregions for each topic
            # Default ROI (default covariance spatial region we regularize towards) (not in paper)
            "roi_size": roi_size,
            # Sample constant (# observations weighting sigma in direction of default covariance)
            # (not in paper)
            "dobs": dobs,
            # Use constrained symmetry on subregions? (only for n_regions = 2)
            "symmetric": symmetric,
            "seed_init": seed_init,  # Random seed for initializing model
        }

        # Add dictionaries for other model info
        self.data = {}
        self.topics = {}

        # Prepare data
        if isinstance(mask, str) and not op.isfile(mask):
            self.mask = get_template(mask, mask="brain")
        else:
            self.mask = load_niimg(mask)

        # Extract document and word indices from count_df
        docidx_mapper = {id_: i for (i, id_) in enumerate(ids)}

        # Create docidx column
        count_df["docidx"] = count_df["id"].map(docidx_mapper)
        count_df = count_df.drop(columns=["id"])

        # Remove words not found anywhere in the corpus
        n_terms = len(count_df.columns) - 1  # number of columns minus one for docidx
        count_df = count_df.loc[:, (count_df != 0).any(axis=0)]
        n_terms_in_corpus = len(count_df.columns) - 1
        if n_terms_in_corpus != n_terms:
            LGR.warning(
                "Some terms in count_df do not appear in corpus. "
                f"Retaining {n_terms_in_corpus/n_terms} terms."
            )

        # Get updated vocabulary
        # List of word-strings (wtoken_word_idx values are indices into this list)
        vocabulary = count_df.columns.tolist()
        vocabulary.remove("docidx")
        self.vocabulary = vocabulary
        widx_mapper = {word: i for (i, word) in enumerate(self.vocabulary)}

        # Melt dataframe and create widx column
        widx_df = pd.melt(count_df, id_vars=["docidx"], var_name="word", value_name="count")
        widx_df["widx"] = widx_df["word"].map(widx_mapper)

        # Replicate rows based on count
        widx_df = widx_df.loc[np.repeat(widx_df.index.values, widx_df["count"])]
        widx_df = widx_df[["docidx", "widx"]].astype(int)
        widx_df.sort_values(by=["docidx", "widx"], inplace=True)

        # List of document-indices for word-tokens
        self.data["wtoken_doc_idx"] = widx_df["docidx"].tolist()
        # List of word-indices for word-tokens
        self.data["wtoken_word_idx"] = widx_df["widx"].tolist()

        # Import all peak-indices into lists
        coordinates_df["docidx"] = coordinates_df["id"].astype(str).map(docidx_mapper)
        coordinates_df = coordinates_df[["docidx", "x", "y", "z"]]
        coordinates_df["docidx"] = coordinates_df["docidx"].astype(int)

        # List of document-indices for peak-tokens x
        self.data["ptoken_doc_idx"] = coordinates_df["docidx"].tolist()
        self.data["ptoken_coords"] = coordinates_df[["x", "y", "z"]].values

        # Seed random number generator
        np.random.seed(self.params["seed_init"])

        # Preallocate vectors of assignment indices
        # word->topic assignments
        self.topics["wtoken_topic_idx"] = np.zeros(len(self.data["wtoken_word_idx"]), dtype=int)

        # Randomly initialize peak->topic assignments (y) ~ unif(1...n_topics)
        self.topics["peak_topic_idx"] = np.random.randint(
            self.params["n_topics"],
            size=(len(self.data["ptoken_doc_idx"])),
        )

        # peak->region assignments
        self.topics["peak_region_idx"] = np.zeros(len(self.data["ptoken_doc_idx"]), dtype=int)

        # Preallocate count matrices
        # Peaks: D x T: Number of peak-tokens assigned to each topic per document
        self.topics["n_peak_tokens_doc_by_topic"] = np.zeros(
            (len(self.ids), self.params["n_topics"]),
            dtype=int,
        )

        # Peaks: R x T: Number of peak-tokens assigned to each subregion per topic
        self.topics["n_peak_tokens_region_by_topic"] = np.zeros(
            (self.params["n_regions"], self.params["n_topics"]),
            dtype=int,
        )

        # Words: W x T: Number of word-tokens assigned to each topic per word-type
        self.topics["n_word_tokens_word_by_topic"] = np.zeros(
            (len(self.vocabulary), self.params["n_topics"]),
            dtype=int,
        )

        # Words: D x T: Number of word-tokens assigned to each topic per document
        self.topics["n_word_tokens_doc_by_topic"] = np.zeros(
            (len(self.ids), self.params["n_topics"]),
            dtype=int,
        )

        # Words: 1 x T: Total number of word-tokens assigned to each topic (across all docs)
        self.topics["total_n_word_tokens_by_topic"] = np.zeros(
            (1, self.params["n_topics"]),
            dtype=int,
        )

        # Preallocate Gaussians for all subregions
        # Regions_Mu & Regions_Sigma: Gaussian mean and covariance for all
        # subregions of all topics
        # Formed using lists (over topics) of lists (over subregions) of numpy
        # arrays
        #   regions_mu = (n_topics, n_regions, 1, n_peak_dims)
        #   regions_sigma = (n_topics, n_regions, n_peak_dims, n_peak_dims)
        # (\mu^{(t)}_r)
        self.topics["regions_mu"] = np.zeros(
            (
                self.params["n_topics"],
                self.params["n_regions"],
                1,
                self.data["ptoken_coords"].shape[1],  # generally 3
            ),
        )
        # (\sigma^{(t)}_r)
        self.topics["regions_sigma"] = np.zeros(
            (
                self.params["n_topics"],
                self.params["n_regions"],
                self.data["ptoken_coords"].shape[1],  # generally 3
                self.data["ptoken_coords"].shape[1],  # generally 3
            )
        )

        # Initialize lists for tracking log-likelihood of data over sampling iterations
        self.loglikelihood = {
            "iter": [],  # Tracks iteration associated with the log-likelihood values
            "x": [],  # Tracks log-likelihood of peak tokens
            "w": [],  # Tracks log-likelihood of word tokens
            "total": [],  # Tracks log-likelihood of peak + word tokens
        }

        # Initialize peak->subregion assignments (r)
        if self.params["symmetric"]:
            # if symmetric model use deterministic assignment :
            #     if peak_val[0] > 0, r = 1, else r = 0
            # Namely, check whether x-coordinate is greater than zero.
            n_pairs = int(self.params["n_regions"] / 2)
            initial_assignments = np.random.randint(
                n_pairs,
                size=(len(self.data["ptoken_doc_idx"])),
            )
            signs = (self.data["ptoken_coords"][:, 0] > 0).astype(int)
            self.topics["peak_region_idx"][:] = (initial_assignments * 2) + signs
        else:
            # if asymmetric model, randomly sample r ~ unif(1...n_regions)
            self.topics["peak_region_idx"][:] = np.random.randint(
                self.params["n_regions"],
                size=(len(self.data["ptoken_doc_idx"])),
            )

        # Update model vectors and count matrices to reflect y and r assignments
        for i_ptoken, peak_doc in enumerate(self.data["ptoken_doc_idx"]):
            # peak-token -> topic assignment (y_i)
            peak_topic = self.topics["peak_topic_idx"][i_ptoken]
            # peak-token -> subregion assignment (c_i)
            peak_region = self.topics["peak_region_idx"][i_ptoken]
            # Increment document-by-topic counts
            self.topics["n_peak_tokens_doc_by_topic"][peak_doc, peak_topic] += 1
            # Increment region-by-topic
            self.topics["n_peak_tokens_region_by_topic"][peak_region, peak_topic] += 1

        # Randomly Initialize Word->Topic Assignments (z) for each word
        # token w_i: sample z_i proportional to p(topic|doc_i)
        for i_wtoken, word in enumerate(self.data["wtoken_word_idx"]):
            # w_i doc-index
            doc = self.data["wtoken_doc_idx"][i_wtoken]

            # Estimate p(t|d) for current doc
            p_topic_g_doc = (
                self.topics["n_peak_tokens_doc_by_topic"][doc, :] + self.params["gamma"]
            )

            # Sample a topic from p(t|d) for the z-assignment
            # Compute a cdf of the sampling distribution for z
            probs = np.cumsum(p_topic_g_doc)

            # How many elements of cdf are less than sample
            random_threshold = np.random.rand() * probs[-1]
            # z = # elements of cdf less than rand-sample
            topic = np.sum(probs < random_threshold)

            # Update model assignment vectors and count-matrices to reflect z
            # Word-token -> topic assignment (z_i)
            self.topics["wtoken_topic_idx"][i_wtoken] = topic
            self.topics["n_word_tokens_word_by_topic"][word, topic] += 1
            self.topics["total_n_word_tokens_by_topic"][0, topic] += 1
            self.topics["n_word_tokens_doc_by_topic"][doc, topic] += 1
예제 #6
0
def feature_spatial(mel_IC):
    """Extract the spatial feature scores.

    For each IC it determines the fraction of the mixture modeled thresholded
    Z-maps respectively located within the CSF or at the brain edges,
    using predefined standardized masks.

    Parameters
    ----------
    mel_IC : str or niimg_like
        Full path of the nii.gz file containing mixture-modeled thresholded
        (p<0.5) Z-maps, registered to the MNI152 2mm template

    Returns
    -------
    edge_fract : array_like
        Array of the edge fraction feature scores for the components of the
        mel_IC file
    csf_fract : array_like
        Array of the CSF fraction feature scores for the components of the
        mel_IC file
    """
    # Get the number of ICs
    mel_IC_img = load_niimg(mel_IC)
    num_ICs = mel_IC_img.shape[3]

    masks_dir = utils.get_resource_path()
    csf_mask = os.path.join(masks_dir, "mask_csf.nii.gz")
    edge_mask = os.path.join(masks_dir, "mask_edge.nii.gz")
    out_mask = os.path.join(masks_dir, "mask_out.nii.gz")

    # Loop over ICs
    edge_fract = np.zeros(num_ICs)
    csf_fract = np.zeros(num_ICs)
    for i in range(num_ICs):
        # Extract IC from the merged melodic_IC_thr2MNI2mm file
        temp_IC = image.index_img(mel_IC, i)

        # Change to absolute Z-values
        temp_IC = image.math_img("np.abs(img)", img=temp_IC)

        # Get sum of Z-values within the total Z-map (calculate via the mean
        # and number of non-zero voxels)
        temp_IC_data = temp_IC.get_fdata()
        tot_sum = np.sum(temp_IC_data)

        if tot_sum == 0:
            LGR.info("\t- The spatial map of component {} is empty. "
                     "Please check!".format(i + 1))

        # Get sum of Z-values of the voxels located within the CSF
        # (calculate via the mean and number of non-zero voxels)
        csf_data = masking.apply_mask(temp_IC, csf_mask)
        csf_sum = np.sum(csf_data)

        # Get sum of Z-values of the voxels located within the Edge
        # (calculate via the mean and number of non-zero voxels)
        edge_data = masking.apply_mask(temp_IC, edge_mask)
        edge_sum = np.sum(edge_data)

        # Get sum of Z-values of the voxels located outside the brain
        # (calculate via the mean and number of non-zero voxels)
        out_data = masking.apply_mask(temp_IC, out_mask)
        out_sum = np.sum(out_data)

        # Determine edge and CSF fraction
        if tot_sum != 0:
            edge_fract[i] = (out_sum + edge_sum) / (tot_sum - csf_sum)
            csf_fract[i] = csf_sum / tot_sum
        else:
            edge_fract[i] = 0
            csf_fract[i] = 0

    # Return feature scores
    return edge_fract, csf_fract
예제 #7
0
def gclda_decode_roi(model, roi, topic_priors=None, prior_weight=1.0):
    r"""Perform image-to-text decoding for discrete inputs using method from Rubin et al. (2017).

    The method used in this function was originally described in :footcite:t:`rubin2017decoding`.

    Parameters
    ----------
    model : :obj:`~nimare.annotate.gclda.GCLDAModel`
        Model object needed for decoding.
    roi : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str`
        Binary image to decode into text. If string, path to a file with
        the binary image.
    topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional
        A 1d array of size (n_topics) with values for topic weighting.
        If None, no weighting is done. Default is None.
    prior_weight : :obj:`float`, optional
        The weight by which the prior will affect the decoding.
        Default is 1.

    Returns
    -------
    decoded_df : :obj:`pandas.DataFrame`
        A DataFrame with the word-tokens and their associated weights.
    topic_weights : :obj:`numpy.ndarray` of :obj:`float`
        The weights of the topics used in decoding.

    Notes
    -----
    ======================    ==============================================================
    Notation                  Meaning
    ======================    ==============================================================
    :math:`v`                 Voxel
    :math:`t`                 Topic
    :math:`w`                 Word type
    :math:`r`                 Region of interest (ROI)
    :math:`p(v|t)`            Probability of topic given voxel (``p_topic_g_voxel``)
    :math:`\\tau_{t}`          Topic weight vector (``topic_weights``)
    :math:`p(w|t)`            Probability of word type given topic (``p_word_g_topic``)
    ======================    ==============================================================

    1.  Compute :math:`p(v|t)`.

            - From :func:`gclda.model.Model.get_spatial_probs()`

    2.  Compute topic weight vector (:math:`\\tau_{t}`) by adding across voxels within ROI.

            - :math:`\\tau_{t} = \sum_{i} {p(t|v_{i})}`

    3.  Multiply :math:`\\tau_{t}` by :math:`p(w|t)`.

            - :math:`p(w|r) \propto \\tau_{t} \cdot p(w|t)`

    4.  The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the
        ROI.

    See Also
    --------
    :class:`~nimare.annotate.gclda.GCLDAModel`
    :func:`~nimare.decode.continuous.gclda_decode_map`
    :func:`~nimare.decode.encode.gclda_encode`

    References
    ----------
    .. footbibliography::
    """
    roi = load_niimg(roi)

    dset_aff = model.mask.affine
    if not np.array_equal(roi.affine, dset_aff):
        raise ValueError(
            "Input roi must have same affine as mask img:\n"
            f"{np.array2string(roi.affine)}\n{np.array2string(dset_aff)}")

    # Load ROI file and get ROI voxels overlapping with brain mask
    mask_vec = model.mask.get_fdata().ravel().astype(bool)
    roi_vec = roi.get_fdata().astype(bool).ravel()
    roi_vec = roi_vec[mask_vec]
    roi_idx = np.where(roi_vec)[0]
    p_topic_g_roi = model.p_topic_g_voxel_[
        roi_idx, :]  # p(T|V) for voxels in ROI only
    topic_weights = np.sum(p_topic_g_roi, axis=0)  # Sum across words
    if topic_priors is not None:
        weighted_priors = weight_priors(topic_priors, prior_weight)
        topic_weights *= weighted_priors

    # Multiply topic_weights by topic-by-word matrix (p_word_g_topic).
    # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0)
    # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :]
    # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0)
    word_weights = np.dot(model.p_word_g_topic_, topic_weights)

    decoded_df = pd.DataFrame(index=model.vocabulary,
                              columns=["Weight"],
                              data=word_weights)
    decoded_df.index.name = "Term"
    return decoded_df, topic_weights
예제 #8
0
def gclda_decode_map(model, image, topic_priors=None, prior_weight=1):
    r"""Perform image-to-text decoding for continuous inputs using method from Rubin et al. (2017).

    The method used in this function was originally described in :footcite:t:`rubin2017decoding`.

    Parameters
    ----------
    model : :obj:`~nimare.annotate.gclda.GCLDAModel`
        Model object needed for decoding.
    image : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str`
        Whole-brain image to decode into text. Must be in same space as
        model and dataset. Model's template available in
        `model.dataset.mask_img`.
    topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional
        A 1d array of size (n_topics) with values for topic weighting.
        If None, no weighting is done. Default is None.
    prior_weight : :obj:`float`, optional
        The weight by which the prior will affect the decoding.
        Default is 1.

    Returns
    -------
    decoded_df : :obj:`pandas.DataFrame`
        A DataFrame with the word-tokens and their associated weights.
    topic_weights : :obj:`numpy.ndarray` of :obj:`float`
        The weights of the topics used in decoding.

    Notes
    -----
    ======================    ==============================================================
    Notation                  Meaning
    ======================    ==============================================================
    :math:`v`                 Voxel
    :math:`t`                 Topic
    :math:`w`                 Word type
    :math:`i`                 Input image
    :math:`p(v|t)`            Probability of topic given voxel (``p_topic_g_voxel``)
    :math:`\\tau_{t}`          Topic weight vector (``topic_weights``)
    :math:`p(w|t)`            Probability of word type given topic (``p_word_g_topic``)
    :math:`\omega`            1d array from input image (``input_values``)
    ======================    ==============================================================

    1.  Compute :math:`p(t|v)` (``p_topic_g_voxel``).

        - From :func:`gclda.model.Model.get_spatial_probs()`

    2.  Squeeze input image to 1d array :math:`\omega` (``input_values``).
    3.  Compute topic weight vector (:math:`\\tau_{t}`) by multiplying :math:`p(t|v)` by input
        image.

            - :math:`\\tau_{t} = p(t|v) \cdot \omega`

    4.  Multiply :math:`\\tau_{t}` by :math:`p(w|t)`.

            - :math:`p(w|i) \propto \\tau_{t} \cdot p(w|t)`

    5.  The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the
        input image.

    See Also
    --------
    :class:`~nimare.annotate.gclda.GCLDAModel`
    :func:`~nimare.decode.discrete.gclda_decode_roi`
    :func:`~nimare.decode.encode.gclda_encode`

    References
    ----------
    .. footbibliography::
    """
    image = load_niimg(image)

    # Load image file and get voxel values
    input_values = apply_mask(image, model.mask)
    topic_weights = np.squeeze(
        np.dot(model.p_topic_g_voxel_.T, input_values[:, None]))
    if topic_priors is not None:
        weighted_priors = weight_priors(topic_priors, prior_weight)
        topic_weights *= weighted_priors

    # Multiply topic_weights by topic-by-word matrix (p_word_g_topic).
    # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0)
    # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :]
    # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0)
    word_weights = np.dot(model.p_word_g_topic_, topic_weights)

    decoded_df = pd.DataFrame(index=model.vocabulary,
                              columns=["Weight"],
                              data=word_weights)
    decoded_df.index.name = "Term"
    return decoded_df, topic_weights
예제 #9
0
def feature_spatial(mel_IC, metric_metadata=None):
    """Extract the spatial feature scores.

    For each IC it determines the fraction of the mixture modeled thresholded
    Z-maps respectively located within the CSF or at the brain edges,
    using predefined standardized masks.

    Parameters
    ----------
    mel_IC : str or niimg_like
        Full path of the nii.gz file containing mixture-modeled thresholded
        (p<0.5) Z-maps, registered to the MNI152 2mm template
    metric_metadata : None or dict, optional
        A dictionary containing metadata about the AROMA metrics.
        If provided, metadata for the ``edge_fract`` and ``csf_fract`` metrics
        will be added.
        Otherwise, no operations will be performed on this parameter.

    Returns
    -------
    edge_fract : array_like
        Array of the edge fraction feature scores for the components of the
        mel_IC file
    csf_fract : array_like
        Array of the CSF fraction feature scores for the components of the
        mel_IC file
    metric_metadata : None or dict
        If the ``metric_metadata`` input was None, then None will be returned.
        Otherwise, this will be a dictionary containing existing information,
        as well as new metadata for the ``edge_fract`` and ``csf_fract``
        metrics.
    """
    if isinstance(metric_metadata, dict):
        metric_metadata["edge_fract"] = {
            "LongName":
            "Edge content fraction",
            "Description":
            ("The fraction of thresholded component z-values at the edge of the brain. "
             "This is calculated by "
             "(1) taking the absolute value of the thresholded Z map for each component, "
             "(2) summing z-statistics from the whole brain, "
             "(3) summing z-statistics from outside of the brain, "
             "(4) summing z-statistics from voxels in CSF compartments, "
             "(5) summing z-statistics from voxels at the edge of the brain, "
             "(6) adding the sums from outside of the brain and the edge of the brain, "
             "(7) subtracting the CSF sum from the total brain sum, and "
             "(8) dividing the out-of-brain+edge-of-brain sum by the whole brain (minus CSF) "
             "sum."),
            "Units":
            "arbitrary",
        }
        metric_metadata["csf_fract"] = {
            "LongName":
            "CSF content fraction",
            "Description":
            ("The fraction of thresholded component z-values in the brain's cerebrospinal "
             "fluid. "
             "This is calculated by "
             "(1) taking the absolute value of the thresholded Z map for each component, "
             "(2) summing z-statistics from the whole brain, "
             "(3) summing z-statistics from voxels in CSF compartments, and "
             "(4) dividing the CSF z-statistic sum by the whole brain z-statistic sum."
             ),
            "Units":
            "arbitrary",
        }

    # Get the number of ICs
    mel_IC_img = load_niimg(mel_IC)
    num_ICs = mel_IC_img.shape[3]

    masks_dir = utils.get_resource_path()
    csf_mask = os.path.join(masks_dir, "mask_csf.nii.gz")
    edge_mask = os.path.join(masks_dir, "mask_edge.nii.gz")
    out_mask = os.path.join(masks_dir, "mask_out.nii.gz")

    # Loop over ICs
    edge_fract = np.zeros(num_ICs)
    csf_fract = np.zeros(num_ICs)
    for i in range(num_ICs):
        # Extract IC from the merged melodic_IC_thr2MNI2mm file
        temp_IC = image.index_img(mel_IC, i)

        # Change to absolute Z-values
        temp_IC = image.math_img("np.abs(img)", img=temp_IC)

        # Get sum of Z-values within the total Z-map (calculate via the mean
        # and number of non-zero voxels)
        temp_IC_data = temp_IC.get_fdata()
        tot_sum = np.sum(temp_IC_data)

        if tot_sum == 0:
            LGR.info("\t- The spatial map of component {} is empty. "
                     "Please check!".format(i + 1))

        # Get sum of Z-values of the voxels located within the CSF
        # (calculate via the mean and number of non-zero voxels)
        csf_data = masking.apply_mask(temp_IC, csf_mask)
        csf_sum = np.sum(csf_data)

        # Get sum of Z-values of the voxels located within the Edge
        # (calculate via the mean and number of non-zero voxels)
        edge_data = masking.apply_mask(temp_IC, edge_mask)
        edge_sum = np.sum(edge_data)

        # Get sum of Z-values of the voxels located outside the brain
        # (calculate via the mean and number of non-zero voxels)
        out_data = masking.apply_mask(temp_IC, out_mask)
        out_sum = np.sum(out_data)

        # Determine edge and CSF fraction
        if tot_sum != 0:
            edge_fract[i] = (out_sum + edge_sum) / (tot_sum - csf_sum)
            csf_fract[i] = csf_sum / tot_sum
        else:
            edge_fract[i] = 0
            csf_fract[i] = 0

    # Return feature scores
    return edge_fract, csf_fract, metric_metadata
예제 #10
0
파일: utils.py 프로젝트: eurunuela/aroma
def denoising(in_file, out_dir, mixing, den_type, den_idx):
    """Remove noise components from fMRI data.

    Parameters
    ----------
    in_file : str
        Full path to the data file (nii.gz) which has to be denoised
    out_dir : str
        Full path of the output directory
    mixing : numpy.ndarray of shape (T, C)
        Mixing matrix.
    den_type : {"aggr", "nonaggr", "both"}
        Type of requested denoising ('aggr': aggressive, 'nonaggr':
        non-aggressive, 'both': both aggressive and non-aggressive
    den_idx : array_like
        Index of the components that should be regressed out

    Output
    ------
    desc-smoothAROMA<den_type>_bold.nii.gz : The denoised fMRI data
    """
    # Check if denoising is needed (i.e. are there motion components?)
    motion_components_found = den_idx.size > 0

    nonaggr_denoised_file = op.join(out_dir,
                                    "desc-smoothAROMAnonaggr_bold.nii.gz")
    aggr_denoised_file = op.join(out_dir, "desc-smoothAROMAaggr_bold.nii.gz")

    if motion_components_found:
        motion_components = mixing[:, den_idx]

        # Create a fake mask to make it easier to reshape the full data to 2D
        img = load_niimg(in_file)
        full_mask = nib.Nifti1Image(np.ones(img.shape[:3], int), img.affine)
        data = masking.apply_mask(img, full_mask)  # T x S

        # Non-aggressive denoising of the data using fsl_regfilt
        # (partial regression), if requested
        if den_type in ("nonaggr", "both"):
            # Fit GLM to all components
            betas = np.linalg.lstsq(mixing, data, rcond=None)[0]

            # Denoise the data using the betas from just the bad components.
            pred_data = np.dot(motion_components, betas[den_idx, :])
            data_denoised = data - pred_data

            # Save to file.
            img_denoised = masking.unmask(data_denoised, full_mask)
            img_denoised.to_filename(nonaggr_denoised_file)

        # Aggressive denoising of the data using fsl_regfilt (full regression)
        if den_type in ("aggr", "both"):
            # Denoise the data with the bad components.
            betas = np.linalg.lstsq(motion_components, data, rcond=None)[0]
            pred_data = np.dot(motion_components, betas)
            data_denoised = data - pred_data

            # Save to file.
            img_denoised = masking.unmask(data_denoised, full_mask)
            img_denoised.to_filename(aggr_denoised_file)
    else:
        LGR.warning("  - None of the components were classified as motion, "
                    "so no denoising is applied (the input file is copied "
                    "as-is).")
        if den_type in ("nonaggr", "both"):
            shutil.copyfile(in_file, nonaggr_denoised_file)

        if den_type in ("aggr", "both"):
            shutil.copyfile(in_file, aggr_denoised_file)