def make_mask_map_4d(data, infile, outfile): """ Make mask map with 4d dimeions data: values for levels in infile. Shape = [4th dimension, regions] infile: input file to replace levels with values outfile: output file name """ from neurosynth.base.mask import Masker from neurosynth.base import imageutils from nibabel import nifti1 data = np.array(data) # Load image with masker masker = Masker(infile) img = imageutils.load_imgs(infile, masker) header = masker.get_header() shape = header.get_data_shape()[0:3] + (data.shape[0], ) header.set_data_shape(shape) result = [] for t_dim, t_val in enumerate(data): result.append(img.copy()) for num, value in enumerate(t_val): np.place(result[t_dim], img == num + 1, [value]) result = np.hstack(result) header.set_data_dtype(result.dtype) # Avoids loss of precision img = nifti1.Nifti1Image(masker.unmask(result).squeeze(), None, header) img.to_filename(outfile)
def make_mask_map(data, infile, outfile, index=None): from neurosynth.base.mask import Masker from neurosynth.base import imageutils # Load image with masker masker = Masker(infile) img = imageutils.load_imgs(infile, masker) img = np.round(img) data = list(data) if index is None: index = np.unique(img) rev_index = None else: all_reg = np.arange(0, img.max()) rev_index = all_reg[np.invert(np.in1d(all_reg, index))] min_val = img.min() for num, value in enumerate(data): ix = index[num] np.place(img, img == ix, [value]) if rev_index is not None: for value in rev_index: np.place(img, img == value + min_val, 0) img = img.astype('float32') imageutils.save_img(img, outfile, masker)
def __init__(self, dataset=None, method='pearson', features=None, mask=None, image_type='pFgA_z', threshold=0.001): """ Initialize a new Decoder instance. Args: dataset: An optional Dataset instance containing features to use in decoding. method: The decoding method to use (optional). By default, Pearson correlation. features: Optional list of features to use in decoding. If None, use all features found in dataset. If features is a list of strings, use only the subset of features in the Dataset that are named in the list. If features is a list of filenames, ignore the dataset entirely and use only the features passed as image files in decoding. mask: An optional mask to apply to features and input images. If None, will use the one in the current Dataset. image_type: An optional string indicating the type of image to use when constructing feature-based images. See meta.analyze_features() for details. By default, uses reverse inference z-score images. threshold: If decoding from a Dataset instance, this is the feature threshold to use to generate the feature maps used in the decoding. """ self.dataset = dataset if dataset is not None: self.masker = self.dataset.masker if features is None: features = dataset.get_feature_names() if mask is not None: self.masker.add(mask) elif mask is not None: self.masker = Masker(mask) else: self.masker = None self.method = method.lower() if self.method == 'roi': self.feature_names = features else: self.load_features(features, image_type=image_type, threshold=threshold)
# This script takes a parcellation that may not have continous numbers, # removes parcels below some size and reorders in order # while also outputting the region and community number from neurosynth.base.mask import Masker from neurosynth.base import imageutils import numpy as np import csv min_vox = 300 file = '../masks/Andy/aal_MNI_V4.nii' outfile = '../masks/Andy/aal_MNI_V4_' + str(min_vox) + '.nii' # Load image with masker masker = Masker(file) img = imageutils.load_imgs(file, masker) # How many levels in the original image print "Original shape:" print np.bincount([int(vox) for vox in img]).shape # Get how many voxels per level and calc those that pass min_vox count = np.bincount(img.astype('int').squeeze()) non_0_ix = np.where(count >= min_vox)[0] zero_ix = np.where(count < min_vox)[0] # Remove those not in a good community bad = list(set(zero_ix)) # Remove
def setUp(self): """ Create a new Dataset and add features. """ maskfile = get_resource_path() + 'MNI152_T1_2mm_brain.nii.gz' self.masker = Masker(maskfile)
def __init__(self, dataset, cluster_on='coactivation', global_mask=None, roi_mask=None, reference_mask=None, features=None, feature_threshold=0.0, min_voxels_per_study=None, min_studies_per_voxel=None, dimension_reduction='ward', n_components=500, distance_metric='correlation', clustering_method='ward', output_dir='.', prefix=None, parcellation_kwargs={}, clustering_kwargs={}): """ Initialize Clusterer. Args: dataset (Dataset): The Dataset instance to use for clustering. cluster_on (str): The kind of data to use as the basis for voxel clustering--i.e., what defines features passed to the clustering algorithm. Valid options: 'studies': features are individual studies. 'coactivation': uses a precomputed distance matrix based on coactivation of all voxels in the roi_mask across all observations in the reference_data. global_mask: An image defining the space to use for all analyses. If None, the mask found in the Dataset will be used. roi_mask: An image that determines which voxels to cluster. All non-zero voxels will be included in the clustering analysis. When roi_mask is None, all voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask can be an image filename, a nibabel image, or an already-masked array with the same dimensions as the global_mask. reference_mask: An image defining the voxels to base the distance matrix computation on. All non-zero voxels will be used to compute the distance matrix. For example, if the roi_mask contains voxels in only the insula, and reference_mask contains voxels in only the cerebellum, then voxels in the insula will be clustered based on the similarity of their coactvation with all and only cerebellum voxels. features: Optional features to use for selecting a subset of the studies in the Dataset instance. If dataset is a numpy matrix, will be ignored. feature_threshold: float; the threshold to use for feature selection. Will be ignored if features is None. min_voxels_per_study: An optional integer. If provided, all voxels with fewer than this number of studies will be removed from analysis. min_studies_per_voxel: An optional integer. If provided, all studies with fewer than this number of active voxels will be removed from analysis. dimension_reduction: Either a scikit-learn object with a fit_transform method, or the name of the parcellation method to use for reducing the dimensionality of the reference mask. Valid options include: None: no parcellation 'ward': spatially constrained hierarchical clustering; see Thirion et al (2014) 'pca': principal component analysis 'grid': downsample the reference mask to an isometric grid Defaults to 'ward'. Note that parcellation will only be used if method is set to 'coactivation' (i.e., it will be ignored by default). n_components: Number of components to request, if using a dimension_reduction method. Meaning depends on parcellation algorithm. distance_metric: Optional string providing the distance metric to use for computation of a distance matrix. When None, no distance matrix is computed and we assume that clustering will be done on the raw data. Valid options are any of the strings accepted by sklearn's pairwise_distances method. Defaults to 'correlation'. Note that for some clustering methods (e.g., k-means), no distance matrix will be computed, and this argument will be ignored. clustering_method: Algorithm to use for clustering. Must be one of 'ward', 'spectral', 'agglomerative', 'dbscan', 'kmeans', or 'minik'. If None, can be set later via set_algorithm() or cluster(). output_directory: Directory to use for writing all outputs. prefix: Optional prefix to prepend to all outputted directories/files. parcellation_kwargs: Optional keyword arguments to pass to parcellation object. clustering_kwargs: Optional keyword arguments to pass to clustering object. """ self.output_dir = output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) self.prefix = prefix # Save all arguments for metadata output self.args = {} for a in ([ 'output_dir', 'features', 'feature_threshold', 'global_mask', 'roi_mask', 'reference_mask', 'distance_metric' ] + clustering_kwargs.keys() + parcellation_kwargs.keys()): self.args[a] = locals()[a] self.set_algorithm(clustering_method, **clustering_kwargs) self.dataset = dataset self.masker = deepcopy( dataset.masker) if global_mask is None else Masker(global_mask) # Condition study inclusion on specific features if features is not None: data = self.dataset.get_ids_by_features( features, threshold=feature_threshold, get_image_data=True) else: data = self.dataset.get_image_data() # Trim data based on minimum number of voxels or studies if min_studies_per_voxel is not None: logger.info("Thresholding voxels based on number of studies.") av = self.masker.unmask(data.sum(1) > min_studies_per_voxel, output='vector') self.masker.add(av) if min_voxels_per_study is not None: logger.info("Thresholding studies based on number of voxels.") active_studies = np.where(data.sum(0) > min_voxels_per_study)[0] data = data[:, active_studies] self.data = data self.set_reference_data(method=cluster_on, mask=reference_mask) # Dimensionality reduction if dimension_reduction is not None: self.dimension_reduction(dimension_reduction, n_components) # Set the voxels to cluster if roi_mask is not None: self.masker.add(roi_mask) self.roi_data = data[self.masker.get_mask(), :] # if roi_mask is not None: self.masker.remove(-1) if distance_metric is not None: self.create_distance_matrix(distance_metric=distance_metric)
def __init__(self, dataset=None, algorithm=None, output_dir='.', grid_scale=None, features=None, feature_threshold=0.0, global_mask=None, roi_mask=None, distance_mask=None, min_voxels_per_study=None, min_studies_per_voxel=None, distance_metric=None, **kwargs): """ Initialize Clusterer. Args: dataset: The dataset to use for clustering. Either a Dataset instance or a numpy array with voxels in rows and features in columns. algorithm: Optional algorithm to use for clustering. If None, an algorithm must be passed to the cluster() method later. output_directory: Directory to use for writing all outputs. grid_scale: Optional integer. If provided, a 3D grid will be applied to the image data, with values in all voxels in each grid cell being averaged prior to clustering analysis. This is an effective means of dimension reduction in cases where the data are otherwise too large for clustering. features: Optional features to use for selecting a subset of the studies in the Dataset instance. If dataset is a numpy matrix, will be ignored. feature_threshold: float; the threshold to use for feature selection. Will be ignored if features is None. global_mask: An image defining the space to use for all analyses. Only necessary if dataset is a numpy array. roi_mask: An image that determines which voxels to cluster. All non-zero voxels will be included in the clustering analysis. When roi_mask is None, all voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask can be an image filename, a nibabel image, or an already-masked array with the same dimensions as the global_mask. distance_mask: An image defining the voxels to base the distance matrix computation on. All non-zero voxels will be used to compute the distance matrix. For example, if the roi_mask contains voxels in only the insula, and distance_mask contains voxels in only the cerebellum, then voxels in the insula will be clustered based on the similarity of their coactvation with all and only cerebellum voxels. min_voxels_per_study: An optional integer. If provided, all voxels with fewer than this number of studies will be removed from analysis. min_studies_per_voxel: An optional integer. If provided, all studies with fewer than this number of active voxels will be removed from analysis. distance_metric: Optional string providing the distance metric to use for computation of a distance matrix. When None, no distance matrix is computed and we assume that clustering will be done on the raw data. **kwargs: Additional keyword arguments to pass to the clustering algorithm. """ self.output_dir = output_dir if algorithm is not None: self._set_clustering_algorithm(algorithm, **kwargs) if isinstance(dataset, Dataset): self.dataset = dataset if global_mask is None: global_mask = dataset.masker if features is not None: data = self.dataset.get_ids_by_features( features, threshold=feature_threshold, get_image_data=True) else: data = self.dataset.get_image_data() # if min_studies_per_voxel is not None: # logger.info("Thresholding voxels based on number of studies.") # sum_vox = data.sum(1) # # Save the indices for later reconstruction # active_vox = np.where(sum_vox > min_studies_per_voxel)[0] # n_active_vox = active_vox.shape[0] # if min_voxels_per_study is not None: # logger.info("Thresholding studies based on number of voxels.") # sum_studies = data.sum(0) # active_studies = np.where(sum_studies > min_voxels_per_study)[0] # n_active_studies = active_studies.shape[0] # if min_studies_per_voxel is not None: # logger.info("Selecting voxels with more than %d studies." % min_studies_per_voxel) # data = data[active_vox, :] # if min_voxels_per_study is not None: # logger.info("Selecting studies with more than %d voxels." % min_voxels_per_study) # data = data[:, active_studies] self.data = data else: self.data = dataset if global_mask is None: raise ValueError( "If dataset is a numpy array, a valid global_mask (filename, " + "Mask instance, or nibabel image) must be passed.") if not isinstance(global_mask, Masker): global_mask = Masker(global_mask) self.masker = global_mask if distance_mask is not None: self.masker.add(distance_mask) if grid_scale is not None: self.target_data, _ = nsr.apply_grid(self.data, masker=self.masker, scale=grid_scale, threshold=None) else: vox = self.masker.get_current_mask(in_global_mask=True) self.target_data = self.data[vox, :] self.masker.reset() if roi_mask is not None: self.masker.add(roi_mask) if grid_scale is not None: self.data, self.grid = nsr.apply_grid(self.data, masker=self.masker, scale=grid_scale, threshold=None) else: vox = self.masker.get_current_mask(in_global_mask=True) self.data = self.data[vox, :] if distance_metric is not None: self.create_distance_matrix(distance_metric=distance_metric)