def test_apply_grid_to_image(self):
     data, grid = reduce.apply_grid(self.dataset, scale=6)
     self.assertEquals(data.shape, (1435, 5))
     sums = np.sum(data, 0)
     self.assertGreater(sums[2], sums[3])
     self.assertGreater(sums[4], sums[0])
Exemple #2
0
    def __init__(self, dataset=None, algorithm=None, output_dir='.',  grid_scale=None,
            features=None, feature_threshold=0.0, global_mask=None, roi_mask=None, 
            distance_mask=None, min_voxels_per_study=None, min_studies_per_voxel=None, 
            distance_metric=None, **kwargs):
        """ Initialize Clusterer.
        Args:
            dataset: The dataset to use for clustering. Either a Dataset instance or a numpy
                array with voxels in rows and features in columns.
            algorithm: Optional algorithm to use for clustering. If None, an algorithm 
                must be passed to the cluster() method later.
            output_directory: Directory to use for writing all outputs.
            grid_scale: Optional integer. If provided, a 3D grid will be applied to the 
                image data, with values in all voxels in each grid cell being averaged 
                prior to clustering analysis. This is an effective means of dimension 
                reduction in cases where the data are otherwise too large for clustering.
            features: Optional features to use for selecting a subset of the studies in the 
                Dataset instance. If dataset is a numpy matrix, will be ignored.
            feature_threshold: float; the threshold to use for feature selection. Will be 
                ignored if features is None.
            global_mask: An image defining the space to use for all analyses. Only necessary
                if dataset is a numpy array.
            roi_mask: An image that determines which voxels to cluster. All non-zero voxels
                will be included in the clustering analysis. When roi_mask is None, all 
                voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask
                can be an image filename, a nibabel image, or an already-masked array with 
                the same dimensions as the global_mask.
            distance_mask: An image defining the voxels to base the distance matrix 
                computation on. All non-zero voxels will be used to compute the distance
                matrix. For example, if the roi_mask contains voxels in only the insula, 
                and distance_mask contains voxels in only the cerebellum, then voxels in 
                the insula will be clustered based on the similarity of their coactvation 
                with all and only cerebellum voxels.
            min_voxels_per_study: An optional integer. If provided, all voxels with fewer 
                than this number of studies will be removed from analysis.
            min_studies_per_voxel: An optional integer. If provided, all studies with fewer 
                than this number of active voxels will be removed from analysis.
            distance_metric: Optional string providing the distance metric to use for 
                computation of a distance matrix. When None, no distance matrix is computed
                and we assume that clustering will be done on the raw data.
            **kwargs: Additional keyword arguments to pass to the clustering algorithm.

        """
        
        self.output_dir = output_dir

        if algorithm is not None:
            self._set_clustering_algorithm(algorithm, **kwargs)

        if isinstance(dataset, Dataset):

            self.dataset = dataset

            if global_mask is None:
                global_mask = dataset.masker

            if features is not None:
                data = self.dataset.get_ids_by_features(features, threshold=feature_threshold, 
                            get_image_data=True)
            else:
                data = self.dataset.get_image_data()

            # if min_studies_per_voxel is not None:
            #     logger.info("Thresholding voxels based on number of studies.")
            #     sum_vox = data.sum(1)
            #     # Save the indices for later reconstruction
            #     active_vox = np.where(sum_vox > min_studies_per_voxel)[0]  
            #     n_active_vox = active_vox.shape[0]

            # if min_voxels_per_study is not None:
            #     logger.info("Thresholding studies based on number of voxels.")
            #     sum_studies = data.sum(0)
            #     active_studies = np.where(sum_studies > min_voxels_per_study)[0]
            #     n_active_studies = active_studies.shape[0]

            # if min_studies_per_voxel is not None:
            #     logger.info("Selecting voxels with more than %d studies." % min_studies_per_voxel)
            #     data = data[active_vox, :]

            # if min_voxels_per_study is not None:
            #     logger.info("Selecting studies with more than %d voxels." % min_voxels_per_study)
            #     data = data[:, active_studies]

            self.data = data

        else:
            self.data = dataset

            if global_mask is None:
                raise ValueError("If dataset is a numpy array, a valid global_mask (filename, " +
                    "Mask instance, or nibabel image) must be passed.")

        if not isinstance(global_mask, Masker):
            global_mask = Masker(global_mask)
        
        self.masker = global_mask

        if distance_mask is not None:
            self.masker.add(distance_mask)
            if grid_scale is not None:
                self.target_data, _ = nsr.apply_grid(self.data, masker=self.masker, scale=grid_scale, threshold=None)
            else:
                vox = self.masker.get_current_mask(in_global_mask=True)
                self.target_data = self.data[vox,:]

            self.masker.reset()

        if roi_mask is not None:
            self.masker.add(roi_mask)

        if grid_scale is not None:
            self.data, self.grid = nsr.apply_grid(self.data, masker=self.masker, scale=grid_scale, threshold=None)
        else:
            vox = self.masker.get_current_mask(in_global_mask=True)
            self.data = self.data[vox,:]
            
        if distance_metric is not None:
            self.create_distance_matrix(distance_metric=distance_metric)
 def test_apply_grid_to_image(self):
     data, grid = reduce.apply_grid(self.dataset, scale=6)
     self.assertEquals(data.shape, (1435, 5))
     sums = np.sum(data, 0)
     self.assertGreater(sums[2], sums[3])
     self.assertGreater(sums[4], sums[0])
Exemple #4
0
    def __init__(self,
                 dataset=None,
                 algorithm=None,
                 output_dir='.',
                 grid_scale=None,
                 features=None,
                 feature_threshold=0.0,
                 global_mask=None,
                 roi_mask=None,
                 distance_mask=None,
                 min_voxels_per_study=None,
                 min_studies_per_voxel=None,
                 distance_metric=None,
                 **kwargs):
        """ Initialize Clusterer.
        Args:
            dataset: The dataset to use for clustering. Either a Dataset instance or a numpy
                array with voxels in rows and features in columns.
            algorithm: Optional algorithm to use for clustering. If None, an algorithm 
                must be passed to the cluster() method later.
            output_directory: Directory to use for writing all outputs.
            grid_scale: Optional integer. If provided, a 3D grid will be applied to the 
                image data, with values in all voxels in each grid cell being averaged 
                prior to clustering analysis. This is an effective means of dimension 
                reduction in cases where the data are otherwise too large for clustering.
            features: Optional features to use for selecting a subset of the studies in the 
                Dataset instance. If dataset is a numpy matrix, will be ignored.
            feature_threshold: float; the threshold to use for feature selection. Will be 
                ignored if features is None.
            global_mask: An image defining the space to use for all analyses. Only necessary
                if dataset is a numpy array.
            roi_mask: An image that determines which voxels to cluster. All non-zero voxels
                will be included in the clustering analysis. When roi_mask is None, all 
                voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask
                can be an image filename, a nibabel image, or an already-masked array with 
                the same dimensions as the global_mask.
            distance_mask: An image defining the voxels to base the distance matrix 
                computation on. All non-zero voxels will be used to compute the distance
                matrix. For example, if the roi_mask contains voxels in only the insula, 
                and distance_mask contains voxels in only the cerebellum, then voxels in 
                the insula will be clustered based on the similarity of their coactvation 
                with all and only cerebellum voxels.
            min_voxels_per_study: An optional integer. If provided, all voxels with fewer 
                than this number of studies will be removed from analysis.
            min_studies_per_voxel: An optional integer. If provided, all studies with fewer 
                than this number of active voxels will be removed from analysis.
            distance_metric: Optional string providing the distance metric to use for 
                computation of a distance matrix. When None, no distance matrix is computed
                and we assume that clustering will be done on the raw data.
            **kwargs: Additional keyword arguments to pass to the clustering algorithm.

        """

        self.output_dir = output_dir

        if algorithm is not None:
            self._set_clustering_algorithm(algorithm, **kwargs)

        if isinstance(dataset, Dataset):

            self.dataset = dataset

            if global_mask is None:
                global_mask = dataset.masker

            if features is not None:
                data = self.dataset.get_ids_by_features(
                    features, threshold=feature_threshold, get_image_data=True)
            else:
                data = self.dataset.get_image_data()

            # if min_studies_per_voxel is not None:
            #     logger.info("Thresholding voxels based on number of studies.")
            #     sum_vox = data.sum(1)
            #     # Save the indices for later reconstruction
            #     active_vox = np.where(sum_vox > min_studies_per_voxel)[0]
            #     n_active_vox = active_vox.shape[0]

            # if min_voxels_per_study is not None:
            #     logger.info("Thresholding studies based on number of voxels.")
            #     sum_studies = data.sum(0)
            #     active_studies = np.where(sum_studies > min_voxels_per_study)[0]
            #     n_active_studies = active_studies.shape[0]

            # if min_studies_per_voxel is not None:
            #     logger.info("Selecting voxels with more than %d studies." % min_studies_per_voxel)
            #     data = data[active_vox, :]

            # if min_voxels_per_study is not None:
            #     logger.info("Selecting studies with more than %d voxels." % min_voxels_per_study)
            #     data = data[:, active_studies]

            self.data = data

        else:
            self.data = dataset

            if global_mask is None:
                raise ValueError(
                    "If dataset is a numpy array, a valid global_mask (filename, "
                    + "Mask instance, or nibabel image) must be passed.")

        if not isinstance(global_mask, Masker):
            global_mask = Masker(global_mask)

        self.masker = global_mask

        if distance_mask is not None:
            self.masker.add(distance_mask)
            if grid_scale is not None:
                self.target_data, _ = nsr.apply_grid(self.data,
                                                     masker=self.masker,
                                                     scale=grid_scale,
                                                     threshold=None)
            else:
                vox = self.masker.get_current_mask(in_global_mask=True)
                self.target_data = self.data[vox, :]

            self.masker.reset()

        if roi_mask is not None:
            self.masker.add(roi_mask)

        if grid_scale is not None:
            self.data, self.grid = nsr.apply_grid(self.data,
                                                  masker=self.masker,
                                                  scale=grid_scale,
                                                  threshold=None)
        else:
            vox = self.masker.get_current_mask(in_global_mask=True)
            self.data = self.data[vox, :]

        if distance_metric is not None:
            self.create_distance_matrix(distance_metric=distance_metric)