def get_studies_by_mask(self, mask): """Extract list of studies with at least one coordinate in mask. Parameters ---------- mask : img_like Mask across which to search for coordinates. Returns ------- found_ids : :obj:`list` A list of IDs from the Dataset with at least one focus in the mask. """ from scipy.spatial.distance import cdist mask = load_niimg(mask) dset_mask = self.masker.mask_img if not np.array_equal(dset_mask.affine, mask.affine): LGR.warning( "Mask affine does not match Dataset affine. Assuming same space." ) dset_ijk = mm2vox(self.coordinates[["x", "y", "z"]].values, mask.affine) mask_ijk = np.vstack(np.where(mask.get_fdata())).T distances = cdist(mask_ijk, dset_ijk) distances = np.any(distances == 0, axis=0) found_ids = list(self.coordinates.loc[distances, "id"].unique()) return found_ids
def get_studies_by_mask(self, mask): """Extract list of studies with at least one coordinate in mask. Parameters ---------- mask : img_like Mask across which to search for coordinates. Returns ------- found_ids : :obj:`list` A list of IDs from the Dataset with at least one focus in the mask. """ from scipy.spatial.distance import cdist mask = load_niimg(mask) dset_mask = self.masker.mask_img if not np.array_equal(dset_mask.affine, mask.affine): from nilearn.image import resample_to_img mask = resample_to_img(mask, dset_mask, interpolation="nearest") mask_ijk = np.vstack(np.where(mask.get_fdata())).T distances = cdist(mask_ijk, self.coordinates[["i", "j", "k"]].values) distances = np.any(distances == 0, axis=0) found_ids = list(self.coordinates.loc[distances, "id"].unique()) return found_ids
def __init__(self, func_img, sample_rate, mask=None, oversample_design_matrix=20, add_intercept=True, detrend=False, standardize=False, confounds_for_extraction=None, memory=None, **kwargs): if isinstance(confounds_for_extraction, pd.DataFrame): confounds_for_extraction = confounds_for_extraction.values self.confounds = confounds_for_extraction if isinstance(mask, input_data.NiftiMasker): self.masker = mask else: if mask is None: logging.warn('No mask has been given. Nilearn will automatically try to'\ 'make one') else: mask = load_niimg(mask) self.masker = input_data.NiftiMasker(mask, detrend=detrend, standardize=standardize, memory=memory) input_signal = self.masker.fit_transform(func_img, confounds=confounds_for_extraction) self.n_voxels = input_signal.shape[1] super(NiftiResponseFitter, self).__init__(input_signal=input_signal, sample_rate=sample_rate, oversample_design_matrix=oversample_design_matrix, add_intercept=add_intercept, **kwargs)
def get_timecourses(self, oversample=None, average_over_mask=False, transform_to_niftis=True, **kwargs ): if len(self.events) is 0: raise Exception("No events were added") timecourses = super(NiftiResponseFitter, self).get_timecourses(oversample=oversample, melt=False, **kwargs) if transform_to_niftis: if average_over_mask: average_over_mask = load_niimg(average_over_mask) weights = image.math_img('mask / mask.sum()', mask=average_over_mask) weights = self.masker.fit_transform(weights) timecourses = timecourses.dot(weights.T) return timecourses.sum(1) else: tc_df = [] for (event_type, covariate, time), tc in timecourses.groupby(level=['event type', 'covariate', 'time']): #timepoints = tc.index.get_level_values('time') tc_nii = self._inverse_transform(tc) tc = pd.DataFrame([tc_nii], index=pd.MultiIndex.from_tuples([(event_type, covariate, time)], names=['event type', 'covariate', 'time']), columns=['nii']) tc_df.append(tc) return pd.concat(tc_df) else: return timecourses
def __init__( self, count_df, coordinates_df, mask="mni152_2mm", n_topics=100, n_regions=2, symmetric=True, alpha=0.1, beta=0.01, gamma=0.01, delta=1.0, dobs=25, roi_size=50.0, seed_init=1, ): LGR.info("Constructing/Initializing GCLDA Model") count_df = count_df.copy() coordinates_df = coordinates_df.copy() # Check IDs from DataFrames count_df.index = count_df.index.astype(str) count_df["id"] = count_df.index count_ids = count_df.index.tolist() if "id" not in coordinates_df.columns: coordinates_df["id"] = coordinates_df.index coordinates_df["id"] = coordinates_df["id"].astype(str) coord_ids = sorted(list(set(coordinates_df["id"].tolist()))) ids = sorted(list(set(count_ids).intersection(coord_ids))) if len(count_ids) != len(coord_ids) != len(ids): union_ids = sorted(list(set(count_ids + coord_ids))) LGR.warning( f"IDs mismatch detected: retaining {len(ids)} of {len(union_ids)} unique IDs" ) self.ids = ids # Reduce inputs based on shared IDs count_df = count_df.loc[count_df["id"].isin(ids)] coordinates_df = coordinates_df.loc[coordinates_df["id"].isin(ids)] # --- Checking to make sure parameters are valid if (symmetric is True) and (n_regions % 2 != 0): # symmetric model only valid if R = 2 raise ValueError("Cannot run a symmetric model unless n_regions is even.") # Initialize sampling parameters # The global sampling iteration of the model self.iter = 0 # Current random seed (is incremented after initialization and each sampling update) self.seed = 0 # Set up model hyperparameters # Pseudo-count hyperparams need to be floats so that when sampling # distributions are computed the count matrices/vectors are converted # to floats self.params = { "n_topics": n_topics, # Number of topics (T) "n_regions": n_regions, # Number of subregions (R) "alpha": alpha, # Prior count on topics for each doc "beta": beta, # Prior count on word-types for each topic "gamma": gamma, # Prior count added to y-counts when sampling z assignments "delta": delta, # Prior count on subregions for each topic # Default ROI (default covariance spatial region we regularize towards) (not in paper) "roi_size": roi_size, # Sample constant (# observations weighting sigma in direction of default covariance) # (not in paper) "dobs": dobs, # Use constrained symmetry on subregions? (only for n_regions = 2) "symmetric": symmetric, "seed_init": seed_init, # Random seed for initializing model } # Add dictionaries for other model info self.data = {} self.topics = {} # Prepare data if isinstance(mask, str) and not op.isfile(mask): self.mask = get_template(mask, mask="brain") else: self.mask = load_niimg(mask) # Extract document and word indices from count_df docidx_mapper = {id_: i for (i, id_) in enumerate(ids)} # Create docidx column count_df["docidx"] = count_df["id"].map(docidx_mapper) count_df = count_df.drop(columns=["id"]) # Remove words not found anywhere in the corpus n_terms = len(count_df.columns) - 1 # number of columns minus one for docidx count_df = count_df.loc[:, (count_df != 0).any(axis=0)] n_terms_in_corpus = len(count_df.columns) - 1 if n_terms_in_corpus != n_terms: LGR.warning( "Some terms in count_df do not appear in corpus. " f"Retaining {n_terms_in_corpus/n_terms} terms." ) # Get updated vocabulary # List of word-strings (wtoken_word_idx values are indices into this list) vocabulary = count_df.columns.tolist() vocabulary.remove("docidx") self.vocabulary = vocabulary widx_mapper = {word: i for (i, word) in enumerate(self.vocabulary)} # Melt dataframe and create widx column widx_df = pd.melt(count_df, id_vars=["docidx"], var_name="word", value_name="count") widx_df["widx"] = widx_df["word"].map(widx_mapper) # Replicate rows based on count widx_df = widx_df.loc[np.repeat(widx_df.index.values, widx_df["count"])] widx_df = widx_df[["docidx", "widx"]].astype(int) widx_df.sort_values(by=["docidx", "widx"], inplace=True) # List of document-indices for word-tokens self.data["wtoken_doc_idx"] = widx_df["docidx"].tolist() # List of word-indices for word-tokens self.data["wtoken_word_idx"] = widx_df["widx"].tolist() # Import all peak-indices into lists coordinates_df["docidx"] = coordinates_df["id"].astype(str).map(docidx_mapper) coordinates_df = coordinates_df[["docidx", "x", "y", "z"]] coordinates_df["docidx"] = coordinates_df["docidx"].astype(int) # List of document-indices for peak-tokens x self.data["ptoken_doc_idx"] = coordinates_df["docidx"].tolist() self.data["ptoken_coords"] = coordinates_df[["x", "y", "z"]].values # Seed random number generator np.random.seed(self.params["seed_init"]) # Preallocate vectors of assignment indices # word->topic assignments self.topics["wtoken_topic_idx"] = np.zeros(len(self.data["wtoken_word_idx"]), dtype=int) # Randomly initialize peak->topic assignments (y) ~ unif(1...n_topics) self.topics["peak_topic_idx"] = np.random.randint( self.params["n_topics"], size=(len(self.data["ptoken_doc_idx"])), ) # peak->region assignments self.topics["peak_region_idx"] = np.zeros(len(self.data["ptoken_doc_idx"]), dtype=int) # Preallocate count matrices # Peaks: D x T: Number of peak-tokens assigned to each topic per document self.topics["n_peak_tokens_doc_by_topic"] = np.zeros( (len(self.ids), self.params["n_topics"]), dtype=int, ) # Peaks: R x T: Number of peak-tokens assigned to each subregion per topic self.topics["n_peak_tokens_region_by_topic"] = np.zeros( (self.params["n_regions"], self.params["n_topics"]), dtype=int, ) # Words: W x T: Number of word-tokens assigned to each topic per word-type self.topics["n_word_tokens_word_by_topic"] = np.zeros( (len(self.vocabulary), self.params["n_topics"]), dtype=int, ) # Words: D x T: Number of word-tokens assigned to each topic per document self.topics["n_word_tokens_doc_by_topic"] = np.zeros( (len(self.ids), self.params["n_topics"]), dtype=int, ) # Words: 1 x T: Total number of word-tokens assigned to each topic (across all docs) self.topics["total_n_word_tokens_by_topic"] = np.zeros( (1, self.params["n_topics"]), dtype=int, ) # Preallocate Gaussians for all subregions # Regions_Mu & Regions_Sigma: Gaussian mean and covariance for all # subregions of all topics # Formed using lists (over topics) of lists (over subregions) of numpy # arrays # regions_mu = (n_topics, n_regions, 1, n_peak_dims) # regions_sigma = (n_topics, n_regions, n_peak_dims, n_peak_dims) # (\mu^{(t)}_r) self.topics["regions_mu"] = np.zeros( ( self.params["n_topics"], self.params["n_regions"], 1, self.data["ptoken_coords"].shape[1], # generally 3 ), ) # (\sigma^{(t)}_r) self.topics["regions_sigma"] = np.zeros( ( self.params["n_topics"], self.params["n_regions"], self.data["ptoken_coords"].shape[1], # generally 3 self.data["ptoken_coords"].shape[1], # generally 3 ) ) # Initialize lists for tracking log-likelihood of data over sampling iterations self.loglikelihood = { "iter": [], # Tracks iteration associated with the log-likelihood values "x": [], # Tracks log-likelihood of peak tokens "w": [], # Tracks log-likelihood of word tokens "total": [], # Tracks log-likelihood of peak + word tokens } # Initialize peak->subregion assignments (r) if self.params["symmetric"]: # if symmetric model use deterministic assignment : # if peak_val[0] > 0, r = 1, else r = 0 # Namely, check whether x-coordinate is greater than zero. n_pairs = int(self.params["n_regions"] / 2) initial_assignments = np.random.randint( n_pairs, size=(len(self.data["ptoken_doc_idx"])), ) signs = (self.data["ptoken_coords"][:, 0] > 0).astype(int) self.topics["peak_region_idx"][:] = (initial_assignments * 2) + signs else: # if asymmetric model, randomly sample r ~ unif(1...n_regions) self.topics["peak_region_idx"][:] = np.random.randint( self.params["n_regions"], size=(len(self.data["ptoken_doc_idx"])), ) # Update model vectors and count matrices to reflect y and r assignments for i_ptoken, peak_doc in enumerate(self.data["ptoken_doc_idx"]): # peak-token -> topic assignment (y_i) peak_topic = self.topics["peak_topic_idx"][i_ptoken] # peak-token -> subregion assignment (c_i) peak_region = self.topics["peak_region_idx"][i_ptoken] # Increment document-by-topic counts self.topics["n_peak_tokens_doc_by_topic"][peak_doc, peak_topic] += 1 # Increment region-by-topic self.topics["n_peak_tokens_region_by_topic"][peak_region, peak_topic] += 1 # Randomly Initialize Word->Topic Assignments (z) for each word # token w_i: sample z_i proportional to p(topic|doc_i) for i_wtoken, word in enumerate(self.data["wtoken_word_idx"]): # w_i doc-index doc = self.data["wtoken_doc_idx"][i_wtoken] # Estimate p(t|d) for current doc p_topic_g_doc = ( self.topics["n_peak_tokens_doc_by_topic"][doc, :] + self.params["gamma"] ) # Sample a topic from p(t|d) for the z-assignment # Compute a cdf of the sampling distribution for z probs = np.cumsum(p_topic_g_doc) # How many elements of cdf are less than sample random_threshold = np.random.rand() * probs[-1] # z = # elements of cdf less than rand-sample topic = np.sum(probs < random_threshold) # Update model assignment vectors and count-matrices to reflect z # Word-token -> topic assignment (z_i) self.topics["wtoken_topic_idx"][i_wtoken] = topic self.topics["n_word_tokens_word_by_topic"][word, topic] += 1 self.topics["total_n_word_tokens_by_topic"][0, topic] += 1 self.topics["n_word_tokens_doc_by_topic"][doc, topic] += 1
def feature_spatial(mel_IC): """Extract the spatial feature scores. For each IC it determines the fraction of the mixture modeled thresholded Z-maps respectively located within the CSF or at the brain edges, using predefined standardized masks. Parameters ---------- mel_IC : str or niimg_like Full path of the nii.gz file containing mixture-modeled thresholded (p<0.5) Z-maps, registered to the MNI152 2mm template Returns ------- edge_fract : array_like Array of the edge fraction feature scores for the components of the mel_IC file csf_fract : array_like Array of the CSF fraction feature scores for the components of the mel_IC file """ # Get the number of ICs mel_IC_img = load_niimg(mel_IC) num_ICs = mel_IC_img.shape[3] masks_dir = utils.get_resource_path() csf_mask = os.path.join(masks_dir, "mask_csf.nii.gz") edge_mask = os.path.join(masks_dir, "mask_edge.nii.gz") out_mask = os.path.join(masks_dir, "mask_out.nii.gz") # Loop over ICs edge_fract = np.zeros(num_ICs) csf_fract = np.zeros(num_ICs) for i in range(num_ICs): # Extract IC from the merged melodic_IC_thr2MNI2mm file temp_IC = image.index_img(mel_IC, i) # Change to absolute Z-values temp_IC = image.math_img("np.abs(img)", img=temp_IC) # Get sum of Z-values within the total Z-map (calculate via the mean # and number of non-zero voxels) temp_IC_data = temp_IC.get_fdata() tot_sum = np.sum(temp_IC_data) if tot_sum == 0: LGR.info("\t- The spatial map of component {} is empty. " "Please check!".format(i + 1)) # Get sum of Z-values of the voxels located within the CSF # (calculate via the mean and number of non-zero voxels) csf_data = masking.apply_mask(temp_IC, csf_mask) csf_sum = np.sum(csf_data) # Get sum of Z-values of the voxels located within the Edge # (calculate via the mean and number of non-zero voxels) edge_data = masking.apply_mask(temp_IC, edge_mask) edge_sum = np.sum(edge_data) # Get sum of Z-values of the voxels located outside the brain # (calculate via the mean and number of non-zero voxels) out_data = masking.apply_mask(temp_IC, out_mask) out_sum = np.sum(out_data) # Determine edge and CSF fraction if tot_sum != 0: edge_fract[i] = (out_sum + edge_sum) / (tot_sum - csf_sum) csf_fract[i] = csf_sum / tot_sum else: edge_fract[i] = 0 csf_fract[i] = 0 # Return feature scores return edge_fract, csf_fract
def gclda_decode_roi(model, roi, topic_priors=None, prior_weight=1.0): r"""Perform image-to-text decoding for discrete inputs using method from Rubin et al. (2017). The method used in this function was originally described in :footcite:t:`rubin2017decoding`. Parameters ---------- model : :obj:`~nimare.annotate.gclda.GCLDAModel` Model object needed for decoding. roi : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str` Binary image to decode into text. If string, path to a file with the binary image. topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional A 1d array of size (n_topics) with values for topic weighting. If None, no weighting is done. Default is None. prior_weight : :obj:`float`, optional The weight by which the prior will affect the decoding. Default is 1. Returns ------- decoded_df : :obj:`pandas.DataFrame` A DataFrame with the word-tokens and their associated weights. topic_weights : :obj:`numpy.ndarray` of :obj:`float` The weights of the topics used in decoding. Notes ----- ====================== ============================================================== Notation Meaning ====================== ============================================================== :math:`v` Voxel :math:`t` Topic :math:`w` Word type :math:`r` Region of interest (ROI) :math:`p(v|t)` Probability of topic given voxel (``p_topic_g_voxel``) :math:`\\tau_{t}` Topic weight vector (``topic_weights``) :math:`p(w|t)` Probability of word type given topic (``p_word_g_topic``) ====================== ============================================================== 1. Compute :math:`p(v|t)`. - From :func:`gclda.model.Model.get_spatial_probs()` 2. Compute topic weight vector (:math:`\\tau_{t}`) by adding across voxels within ROI. - :math:`\\tau_{t} = \sum_{i} {p(t|v_{i})}` 3. Multiply :math:`\\tau_{t}` by :math:`p(w|t)`. - :math:`p(w|r) \propto \\tau_{t} \cdot p(w|t)` 4. The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the ROI. See Also -------- :class:`~nimare.annotate.gclda.GCLDAModel` :func:`~nimare.decode.continuous.gclda_decode_map` :func:`~nimare.decode.encode.gclda_encode` References ---------- .. footbibliography:: """ roi = load_niimg(roi) dset_aff = model.mask.affine if not np.array_equal(roi.affine, dset_aff): raise ValueError( "Input roi must have same affine as mask img:\n" f"{np.array2string(roi.affine)}\n{np.array2string(dset_aff)}") # Load ROI file and get ROI voxels overlapping with brain mask mask_vec = model.mask.get_fdata().ravel().astype(bool) roi_vec = roi.get_fdata().astype(bool).ravel() roi_vec = roi_vec[mask_vec] roi_idx = np.where(roi_vec)[0] p_topic_g_roi = model.p_topic_g_voxel_[ roi_idx, :] # p(T|V) for voxels in ROI only topic_weights = np.sum(p_topic_g_roi, axis=0) # Sum across words if topic_priors is not None: weighted_priors = weight_priors(topic_priors, prior_weight) topic_weights *= weighted_priors # Multiply topic_weights by topic-by-word matrix (p_word_g_topic). # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0) # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :] # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0) word_weights = np.dot(model.p_word_g_topic_, topic_weights) decoded_df = pd.DataFrame(index=model.vocabulary, columns=["Weight"], data=word_weights) decoded_df.index.name = "Term" return decoded_df, topic_weights
def gclda_decode_map(model, image, topic_priors=None, prior_weight=1): r"""Perform image-to-text decoding for continuous inputs using method from Rubin et al. (2017). The method used in this function was originally described in :footcite:t:`rubin2017decoding`. Parameters ---------- model : :obj:`~nimare.annotate.gclda.GCLDAModel` Model object needed for decoding. image : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str` Whole-brain image to decode into text. Must be in same space as model and dataset. Model's template available in `model.dataset.mask_img`. topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional A 1d array of size (n_topics) with values for topic weighting. If None, no weighting is done. Default is None. prior_weight : :obj:`float`, optional The weight by which the prior will affect the decoding. Default is 1. Returns ------- decoded_df : :obj:`pandas.DataFrame` A DataFrame with the word-tokens and their associated weights. topic_weights : :obj:`numpy.ndarray` of :obj:`float` The weights of the topics used in decoding. Notes ----- ====================== ============================================================== Notation Meaning ====================== ============================================================== :math:`v` Voxel :math:`t` Topic :math:`w` Word type :math:`i` Input image :math:`p(v|t)` Probability of topic given voxel (``p_topic_g_voxel``) :math:`\\tau_{t}` Topic weight vector (``topic_weights``) :math:`p(w|t)` Probability of word type given topic (``p_word_g_topic``) :math:`\omega` 1d array from input image (``input_values``) ====================== ============================================================== 1. Compute :math:`p(t|v)` (``p_topic_g_voxel``). - From :func:`gclda.model.Model.get_spatial_probs()` 2. Squeeze input image to 1d array :math:`\omega` (``input_values``). 3. Compute topic weight vector (:math:`\\tau_{t}`) by multiplying :math:`p(t|v)` by input image. - :math:`\\tau_{t} = p(t|v) \cdot \omega` 4. Multiply :math:`\\tau_{t}` by :math:`p(w|t)`. - :math:`p(w|i) \propto \\tau_{t} \cdot p(w|t)` 5. The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the input image. See Also -------- :class:`~nimare.annotate.gclda.GCLDAModel` :func:`~nimare.decode.discrete.gclda_decode_roi` :func:`~nimare.decode.encode.gclda_encode` References ---------- .. footbibliography:: """ image = load_niimg(image) # Load image file and get voxel values input_values = apply_mask(image, model.mask) topic_weights = np.squeeze( np.dot(model.p_topic_g_voxel_.T, input_values[:, None])) if topic_priors is not None: weighted_priors = weight_priors(topic_priors, prior_weight) topic_weights *= weighted_priors # Multiply topic_weights by topic-by-word matrix (p_word_g_topic). # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0) # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :] # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0) word_weights = np.dot(model.p_word_g_topic_, topic_weights) decoded_df = pd.DataFrame(index=model.vocabulary, columns=["Weight"], data=word_weights) decoded_df.index.name = "Term" return decoded_df, topic_weights
def feature_spatial(mel_IC, metric_metadata=None): """Extract the spatial feature scores. For each IC it determines the fraction of the mixture modeled thresholded Z-maps respectively located within the CSF or at the brain edges, using predefined standardized masks. Parameters ---------- mel_IC : str or niimg_like Full path of the nii.gz file containing mixture-modeled thresholded (p<0.5) Z-maps, registered to the MNI152 2mm template metric_metadata : None or dict, optional A dictionary containing metadata about the AROMA metrics. If provided, metadata for the ``edge_fract`` and ``csf_fract`` metrics will be added. Otherwise, no operations will be performed on this parameter. Returns ------- edge_fract : array_like Array of the edge fraction feature scores for the components of the mel_IC file csf_fract : array_like Array of the CSF fraction feature scores for the components of the mel_IC file metric_metadata : None or dict If the ``metric_metadata`` input was None, then None will be returned. Otherwise, this will be a dictionary containing existing information, as well as new metadata for the ``edge_fract`` and ``csf_fract`` metrics. """ if isinstance(metric_metadata, dict): metric_metadata["edge_fract"] = { "LongName": "Edge content fraction", "Description": ("The fraction of thresholded component z-values at the edge of the brain. " "This is calculated by " "(1) taking the absolute value of the thresholded Z map for each component, " "(2) summing z-statistics from the whole brain, " "(3) summing z-statistics from outside of the brain, " "(4) summing z-statistics from voxels in CSF compartments, " "(5) summing z-statistics from voxels at the edge of the brain, " "(6) adding the sums from outside of the brain and the edge of the brain, " "(7) subtracting the CSF sum from the total brain sum, and " "(8) dividing the out-of-brain+edge-of-brain sum by the whole brain (minus CSF) " "sum."), "Units": "arbitrary", } metric_metadata["csf_fract"] = { "LongName": "CSF content fraction", "Description": ("The fraction of thresholded component z-values in the brain's cerebrospinal " "fluid. " "This is calculated by " "(1) taking the absolute value of the thresholded Z map for each component, " "(2) summing z-statistics from the whole brain, " "(3) summing z-statistics from voxels in CSF compartments, and " "(4) dividing the CSF z-statistic sum by the whole brain z-statistic sum." ), "Units": "arbitrary", } # Get the number of ICs mel_IC_img = load_niimg(mel_IC) num_ICs = mel_IC_img.shape[3] masks_dir = utils.get_resource_path() csf_mask = os.path.join(masks_dir, "mask_csf.nii.gz") edge_mask = os.path.join(masks_dir, "mask_edge.nii.gz") out_mask = os.path.join(masks_dir, "mask_out.nii.gz") # Loop over ICs edge_fract = np.zeros(num_ICs) csf_fract = np.zeros(num_ICs) for i in range(num_ICs): # Extract IC from the merged melodic_IC_thr2MNI2mm file temp_IC = image.index_img(mel_IC, i) # Change to absolute Z-values temp_IC = image.math_img("np.abs(img)", img=temp_IC) # Get sum of Z-values within the total Z-map (calculate via the mean # and number of non-zero voxels) temp_IC_data = temp_IC.get_fdata() tot_sum = np.sum(temp_IC_data) if tot_sum == 0: LGR.info("\t- The spatial map of component {} is empty. " "Please check!".format(i + 1)) # Get sum of Z-values of the voxels located within the CSF # (calculate via the mean and number of non-zero voxels) csf_data = masking.apply_mask(temp_IC, csf_mask) csf_sum = np.sum(csf_data) # Get sum of Z-values of the voxels located within the Edge # (calculate via the mean and number of non-zero voxels) edge_data = masking.apply_mask(temp_IC, edge_mask) edge_sum = np.sum(edge_data) # Get sum of Z-values of the voxels located outside the brain # (calculate via the mean and number of non-zero voxels) out_data = masking.apply_mask(temp_IC, out_mask) out_sum = np.sum(out_data) # Determine edge and CSF fraction if tot_sum != 0: edge_fract[i] = (out_sum + edge_sum) / (tot_sum - csf_sum) csf_fract[i] = csf_sum / tot_sum else: edge_fract[i] = 0 csf_fract[i] = 0 # Return feature scores return edge_fract, csf_fract, metric_metadata
def denoising(in_file, out_dir, mixing, den_type, den_idx): """Remove noise components from fMRI data. Parameters ---------- in_file : str Full path to the data file (nii.gz) which has to be denoised out_dir : str Full path of the output directory mixing : numpy.ndarray of shape (T, C) Mixing matrix. den_type : {"aggr", "nonaggr", "both"} Type of requested denoising ('aggr': aggressive, 'nonaggr': non-aggressive, 'both': both aggressive and non-aggressive den_idx : array_like Index of the components that should be regressed out Output ------ desc-smoothAROMA<den_type>_bold.nii.gz : The denoised fMRI data """ # Check if denoising is needed (i.e. are there motion components?) motion_components_found = den_idx.size > 0 nonaggr_denoised_file = op.join(out_dir, "desc-smoothAROMAnonaggr_bold.nii.gz") aggr_denoised_file = op.join(out_dir, "desc-smoothAROMAaggr_bold.nii.gz") if motion_components_found: motion_components = mixing[:, den_idx] # Create a fake mask to make it easier to reshape the full data to 2D img = load_niimg(in_file) full_mask = nib.Nifti1Image(np.ones(img.shape[:3], int), img.affine) data = masking.apply_mask(img, full_mask) # T x S # Non-aggressive denoising of the data using fsl_regfilt # (partial regression), if requested if den_type in ("nonaggr", "both"): # Fit GLM to all components betas = np.linalg.lstsq(mixing, data, rcond=None)[0] # Denoise the data using the betas from just the bad components. pred_data = np.dot(motion_components, betas[den_idx, :]) data_denoised = data - pred_data # Save to file. img_denoised = masking.unmask(data_denoised, full_mask) img_denoised.to_filename(nonaggr_denoised_file) # Aggressive denoising of the data using fsl_regfilt (full regression) if den_type in ("aggr", "both"): # Denoise the data with the bad components. betas = np.linalg.lstsq(motion_components, data, rcond=None)[0] pred_data = np.dot(motion_components, betas) data_denoised = data - pred_data # Save to file. img_denoised = masking.unmask(data_denoised, full_mask) img_denoised.to_filename(aggr_denoised_file) else: LGR.warning(" - None of the components were classified as motion, " "so no denoising is applied (the input file is copied " "as-is).") if den_type in ("nonaggr", "both"): shutil.copyfile(in_file, nonaggr_denoised_file) if den_type in ("aggr", "both"): shutil.copyfile(in_file, aggr_denoised_file)