Python Maskerの例

プログラミング言語: Python

名前空間/パッケージ名: neurosynth.base.mask

クラス/型: Masker

hotexamples.comのコード掲載数: 14

Python Masker - 14件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのneurosynth.base.mask.Maskerの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Masker(7)

add(1)

get_current_mask(1)

get_header(1)

get_mask(1)

remove(1)

unmask(1)

コード例 #1

ファイルを表示

ファイル: tools.py プロジェクト: margulies/NS_Classify

def make_mask_map_4d(data, infile, outfile):
    """ Make mask map with 4d dimeions
    data: values for levels in infile. Shape = [4th dimension, regions]
    infile: input file to replace levels with values
    outfile: output file name
    """
    from neurosynth.base.mask import Masker
    from neurosynth.base import imageutils
    from nibabel import nifti1

    data = np.array(data)

    # Load image with masker
    masker = Masker(infile)
    img = imageutils.load_imgs(infile, masker)

    header = masker.get_header()

    shape = header.get_data_shape()[0:3] + (data.shape[0],)
    header.set_data_shape(shape)

    result = []

    for t_dim, t_val in enumerate(data):
        result.append(img.copy())
        for num, value in enumerate(t_val):
            np.place(result[t_dim], img == num + 1, [value])

    result = np.hstack(result)

    header.set_data_dtype(result.dtype)  # Avoids loss of precision
    img = nifti1.Nifti1Image(masker.unmask(result).squeeze(), None, header)
    img.to_filename(outfile)

コード例 #2

ファイルを表示

ファイル: tools.py プロジェクト: adelavega/ns_classify

def make_mask_map_4d(data, infile, outfile):
    """ Make mask map with 4d dimeions
    data: values for levels in infile. Shape = [4th dimension, regions]
    infile: input file to replace levels with values
    outfile: output file name
    """
    from neurosynth.base.mask import Masker
    from neurosynth.base import imageutils
    from nibabel import nifti1

    data = np.array(data)

    # Load image with masker
    masker = Masker(infile)
    img = imageutils.load_imgs(infile, masker)

    header = masker.get_header()

    shape = header.get_data_shape()[0:3] + (data.shape[0], )
    header.set_data_shape(shape)

    result = []

    for t_dim, t_val in enumerate(data):
        result.append(img.copy())
        for num, value in enumerate(t_val):
            np.place(result[t_dim], img == num + 1, [value])

    result = np.hstack(result)

    header.set_data_dtype(result.dtype)  # Avoids loss of precision
    img = nifti1.Nifti1Image(masker.unmask(result).squeeze(), None, header)
    img.to_filename(outfile)

コード例 #3

ファイルを表示

ファイル: decode.py プロジェクト: dandolo2020/neurosynth-base-code

    def __init__(self,
                 dataset=None,
                 method='pearson',
                 features=None,
                 mask=None,
                 image_type='pFgA_z',
                 threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
            dataset: An optional Dataset instance containing features to use in
                decoding.
            method: The decoding method to use (optional). By default, Pearson
                correlation.
            features: Optional list of features to use in decoding. If None,
                use all features found in dataset. If features is a list of
                strings, use only the subset of features in the Dataset that
                are named in the list. If features is a list of filenames,
                ignore the dataset entirely and use only the features passed as
                image files in decoding.
            mask: An optional mask to apply to features and input images. If
                None, will use the one in the current Dataset.
            image_type: An optional string indicating the type of image to use
                when constructing feature-based images. See
                meta.analyze_features() for details. By default, uses reverse
                inference z-score images.
            threshold: If decoding from a Dataset instance, this is the feature
                threshold to use to generate the feature maps used in the
                decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
            self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        if self.method == 'roi':
            self.feature_names = features
        else:
            self.load_features(features,
                               image_type=image_type,
                               threshold=threshold)

コード例 #4

ファイルを表示

ファイル: tools.py プロジェクト: adelavega/ns_classify

def make_mask_map(data, infile, outfile, index=None):
    from neurosynth.base.mask import Masker
    from neurosynth.base import imageutils

    # Load image with masker
    masker = Masker(infile)
    img = imageutils.load_imgs(infile, masker)

    img = np.round(img)

    data = list(data)

    if index is None:
        index = np.unique(img)
        rev_index = None
    else:
        all_reg = np.arange(0, img.max())
        rev_index = all_reg[np.invert(np.in1d(all_reg, index))]

    min_val = img.min()

    for num, value in enumerate(data):
        ix = index[num]

        np.place(img, img == ix, [value])

    if rev_index is not None:
        for value in rev_index:
            np.place(img, img == value + min_val, 0)

    img = img.astype('float32')

    imageutils.save_img(img, outfile, masker)

コード例 #5

ファイルを表示

ファイル: test_base.py プロジェクト: simudream/neurosynth

class TestMasker(unittest.TestCase):

    def setUp(self):
        """ Create a new Dataset and add features. """
        maskfile = get_resource_path() + 'MNI152_T1_2mm_brain.nii.gz'
        self.masker = Masker(maskfile)

    def test_add_and_remove_masks(self):
        self.masker.add(get_test_data_path() + 'sgacc_mask.nii.gz')
        self.masker.add({'motor': get_test_data_path() + 'medial_motor.nii.gz'})
        self.assertEqual(len(self.masker.layers), 2)
        self.assertEqual(len(self.masker.stack), 2)
        self.assertEqual(set(self.masker.layers.keys()), set(['layer_0', 'motor']))
        self.assertEqual(np.sum(self.masker.layers['motor']), 1419)
        self.masker.remove('motor')
        self.assertEqual(len(self.masker.layers), 1)
        self.assertEqual(len(self.masker.stack), 1)
        self.masker.add(get_test_data_path() + 'medial_motor.nii.gz')
        self.masker.remove(-1)
        self.assertTrue('layer_0' in self.masker.layers.keys())
        self.assertEqual(len(self.masker.layers), 1)

コード例 #6

ファイルを表示

ファイル: decode.py プロジェクト: johnsonc/neurosynth

    def __init__(self, dataset=None, method='pearson', features=None,
                 mask=None, image_type='pFgA_z', threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
            dataset: An optional Dataset instance containing features to use in
                decoding.
            method: The decoding method to use (optional). By default, Pearson
                correlation.
            features: Optional list of features to use in decoding. If None,
                use all features found in dataset. If features is a list of
                strings, use only the subset of features in the Dataset that
                are named in the list. If features is a list of filenames,
                ignore the dataset entirely and use only the features passed as
                image files in decoding.
            mask: An optional mask to apply to features and input images. If
                None, will use the one in the current Dataset.
            image_type: An optional string indicating the type of image to use
                when constructing feature-based images. See
                meta.analyze_features() for details. By default, uses reverse
                inference z-score images.
            threshold: If decoding from a Dataset instance, this is the feature
                threshold to use to generate the feature maps used in the
                decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
            self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        if self.method == 'roi':
            self.feature_names = features
        else:
            self.load_features(features, image_type=image_type,
                               threshold=threshold)

コード例 #7

ファイルを表示

ファイル: parcellation_simplify.py プロジェクト: adelavega/ns_classify

# This script takes a parcellation that may not have continous numbers, 
# removes parcels below some size and reorders in order 
# while also outputting the region and community number
from neurosynth.base.mask import Masker
from neurosynth.base import imageutils
import numpy as np
import csv

min_vox = 300

file = '../masks/Andy/aal_MNI_V4.nii'
outfile = '../masks/Andy/aal_MNI_V4_' + str(min_vox) + '.nii'


# Load image with masker
masker = Masker(file)
img = imageutils.load_imgs(file, masker)

# How many levels in the original image
print "Original shape:"
print np.bincount([int(vox) for vox in img]).shape

# Get how many voxels per level and calc those that pass min_vox
count = np.bincount(img.astype('int').squeeze())
non_0_ix = np.where(count >= min_vox)[0]
zero_ix = np.where(count < min_vox)[0]

# Remove those not in a good community
bad = list(set(zero_ix))

# Remove

コード例 #8

ファイルを表示

ファイル: decode.py プロジェクト: zhenghanQ/neurosynth

class Decoder:

    def __init__(self, dataset=None, method='pearson', features=None,
                 mask=None, image_type='specificity_z', threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
            dataset: An optional Dataset instance containing features to use in
                decoding.
            method: The decoding method to use (optional). By default, Pearson
                correlation.
            features: Optional list of features to use in decoding. If None,
                use all features found in dataset. If features is a list of
                strings, use only the subset of features in the Dataset that
                are named in the list. If features is a list of filenames,
                ignore the dataset entirely and use only the features passed as
                image files in decoding.
            mask: An optional mask to apply to features and input images. If
                None, will use the one in the current Dataset.
            image_type: An optional string indicating the type of image to use
                when constructing feature-based images. See
                meta.analyze_features() for details. By default, uses reverse
                inference z-score images.
            threshold: If decoding from a Dataset instance, this is the feature
                threshold to use to generate the feature maps used in the
                decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
            self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        if self.method == 'roi':
            self.feature_names = features
        else:
            self.load_features(features, image_type=image_type,
                               threshold=threshold)

    def decode(self, images, save=None, round=4, names=None, **kwargs):
        """ Decodes a set of images.

        Args:
          images: The images to decode. Can be:
            - A single String specifying the filename of the image to decode
            - A list of filenames
            - A single NumPy array containing the image data
          save: Optional filename to save results to. If None (default), returns
            all results as an array.
          round: Optional integer indicating number of decimals to round result
            to. Defaults to 4.
          names: Optional list of names corresponding to the images in filenames.
            If passed, must be of same length and in same order as filenames.
            By default, the columns in the output will be named using the image
            filenames.

        Returns:
          An n_features x n_files numpy array, where each feature is a row and
          each image is a column. The meaning of the values depends on the
          decoding method used. """

        if isinstance(images, string_types):
            images = [images]

        if isinstance(images, list):
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
        else:
            imgs_to_decode = images

        methods = {
            'pearson': self._pearson_correlation,
            'dot': self._dot_product,
            'roi': self._roi_association
        }

        result = np.around(
            methods[self.method](imgs_to_decode, **kwargs), round)

        # if save is not None:

        if names is None:
            if type(images).__module__ == np.__name__:
                names = ['image_%d' % i for i in range(images.shape[1])]
            elif self.method == 'roi':
                names = ['cluster_%d' % i for i in range(result.shape[1])]
            else:
                names = images

        result = pd.DataFrame(result, columns=names, index=self.feature_names)

        if save is not None:
            result.to_csv(save, index_label='Feature')
        return result

    def set_method(self, method):
        """ Set decoding method. """
        self.method = method

    def load_features(self, features, image_type=None, from_array=False,
                      threshold=0.001):
        """ Load features from current Dataset instance or a list of files.
        Args:
            features: List containing paths to, or names of, features to
                extract. Each element in the list must be a string containing
                either a path to an image, or the name of a feature (as named
                in the current Dataset). Mixing of paths and feature names
                within the list is not allowed.
            image_type: Optional suffix indicating which kind of image to use
                for analysis. Only used if features are taken from the Dataset;
                if features is a list of filenames, image_type is ignored.
            from_array: If True, the features argument is interpreted as a
                string pointing to the location of a 2D ndarray on disk
                containing feature data, where rows are voxels and columns are
                individual features.
            threshold: If features are taken from the dataset, this is the
                threshold passed to the meta-analysis module to generate fresh
                images.

        """
        if from_array:
            if isinstance(features, list):
                features = features[0]
            self._load_features_from_array(features)
        elif path.exists(features[0]):
            self._load_features_from_images(features)
        else:
            self._load_features_from_dataset(
                features, image_type=image_type, threshold=threshold)

    def _load_features_from_array(self, features):
        """ Load feature data from a 2D ndarray on disk. """
        self.feature_images = np.load(features)
        self.feature_names = range(self.feature_images.shape[1])

    def _load_features_from_dataset(self, features=None, image_type=None,
                                    threshold=0.001):
        """ Load feature image data from the current Dataset instance. See
        load_features() for documentation.
        """
        self.feature_names = self.dataset.feature_table.feature_names
        if features is not None:
            self.feature_names = [f for f in features if f in self.feature_names]
        from neurosynth.analysis import meta
        self.feature_images = meta.analyze_features(
            self.dataset, self.feature_names, image_type=image_type,
            threshold=threshold)
        # Apply a mask if one was originally passed
        if self.masker.layers:
            in_mask = self.masker.get_mask(in_global_mask=True)
            self.feature_images = self.feature_images[in_mask, :]

    def _load_features_from_images(self, images, names=None):
        """ Load feature image data from image files.

        Args:
          images: A list of image filenames.
          names: An optional list of strings to use as the feature names. Must
            be in the same order as the images.
        """
        if names is not None and len(names) != len(images):
            raise Exception(
                "Lists of feature names and images must be of same length!")
        self.feature_names = names if names is not None else images
        self.feature_images = imageutils.load_imgs(images, self.masker)

    def train_classifiers(self, features=None):
        ''' Train a set of classifiers '''
        # for f in features:
        #     clf = Classifier(None)
        #     self.classifiers.append(clf)
        pass

    def _pearson_correlation(self, imgs_to_decode):
        """ Decode images using Pearson's r.

        Computes the correlation between each input image and each feature
        image across voxels.

        Args:
            imgs_to_decode: An ndarray of images to decode, with voxels in rows
                and images in columns.

        Returns:
            An n_features x n_images 2D array, with each cell representing the
            pearson correlation between the i'th feature and the j'th image
            across all voxels.
        """
        x, y = imgs_to_decode.astype(float), self.feature_images.astype(float)
        return self._xy_corr(x, y)

    def _dot_product(self, imgs_to_decode):
        """ Decoding using the dot product.
        """
        return np.dot(imgs_to_decode.T, self.feature_images).T

    def _roi_association(self, imgs_to_decode, value='z', binarize=None):
        """ Computes the strength of association between activation in a mask
        and presence/absence of a semantic feature. This is essentially a
        generalization of the voxel-wise reverse inference z-score to the
        multivoxel case.
        """
        imgs_to_decode = imgs_to_decode.squeeze()
        x = average_within_regions(self.dataset, imgs_to_decode).astype(float)
        y = self.dataset.feature_table.data[self.feature_names].values
        if binarize is not None:
            y[y > binarize] = 1.
            y[y < 1.] = 0.
        r = self._xy_corr(x.T, y)
        if value == 'r':
            return r
        elif value == 'z':
            f_r = np.arctanh(r)
            return f_r*np.sqrt(y.shape[0]-3)

    def _xy_corr(self, x, y):
        x, y = x - x.mean(0), y - y.mean(0)
        x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
        return x.T.dot(y).T

    def plot_polar(self, data, n_top=3, overplot=False, labels=None,
                   palette='husl'):

        r = np.linspace(0, 10, num=100)
        n_panels = data.shape[1]

        if labels is None:
            labels = []
            for i in range(n_panels):
                labels.extend(data.iloc[:, i].order(ascending=False)
                              .index[:n_top])
            labels = np.unique(labels)

        data = data.loc[labels, :]

        # Use hierarchical clustering to order
        from scipy.spatial.distance import pdist
        from scipy.cluster.hierarchy import linkage, leaves_list
        dists = pdist(data, metric='correlation')
        pairs = linkage(dists)
        order = leaves_list(pairs)
        data = data.iloc[order, :]
        labels = [labels[i] for i in order]

        theta = np.linspace(0.0, 2 * np.pi, len(labels), endpoint=False)
        if overplot:
            fig, ax = plt.subplots(1, 1, subplot_kw=dict(polar=True))
            fig.set_size_inches(10, 10)
        else:
            fig, axes = plt.subplots(1, n_panels, sharex=False, sharey=False,
                                     subplot_kw=dict(polar=True))
            fig.set_size_inches((6 * n_panels, 6))
        # A bit silly to import seaborn just for this...
        # should extract just the color_palette functionality.
        import seaborn as sns
        colors = sns.color_palette(palette, n_panels)
        for i in range(n_panels):
            if overplot:
                alpha = 0.2
            else:
                ax = axes[i]
                alpha = 0.8
            ax.set_ylim(data.values.min(), data.values.max())
            d = data.iloc[:, i].values
            ax.fill(theta, d, color=colors[i], alpha=alpha, ec='k',
                    linewidth=0)
            ax.fill(theta, d, alpha=1.0, ec=colors[i],
                    linewidth=2, fill=False)
            ax.set_xticks(theta)
            ax.set_xticklabels(labels, fontsize=18)
            [lab.set_fontsize(18) for lab in ax.get_yticklabels()]
            ax.set_title('Cluster %d' % i, fontsize=22, y=1.12)
        plt.tight_layout()
        return plt

コード例 #9

ファイルを表示

ファイル: test_base.py プロジェクト: dandolo2020/neurosynth-base-code

 def setUp(self):
     """ Create a new Dataset and add features. """
     maskfile = get_resource_path() + 'MNI152_T1_2mm_brain.nii.gz'
     self.masker = Masker(maskfile)

コード例 #10

ファイルを表示

ファイル: decode.py プロジェクト: johnsonc/neurosynth

class Decoder:

    def __init__(self, dataset=None, method='pearson', features=None,
                 mask=None, image_type='pFgA_z', threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
            dataset: An optional Dataset instance containing features to use in
                decoding.
            method: The decoding method to use (optional). By default, Pearson
                correlation.
            features: Optional list of features to use in decoding. If None,
                use all features found in dataset. If features is a list of
                strings, use only the subset of features in the Dataset that
                are named in the list. If features is a list of filenames,
                ignore the dataset entirely and use only the features passed as
                image files in decoding.
            mask: An optional mask to apply to features and input images. If
                None, will use the one in the current Dataset.
            image_type: An optional string indicating the type of image to use
                when constructing feature-based images. See
                meta.analyze_features() for details. By default, uses reverse
                inference z-score images.
            threshold: If decoding from a Dataset instance, this is the feature
                threshold to use to generate the feature maps used in the
                decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
            self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        if self.method == 'roi':
            self.feature_names = features
        else:
            self.load_features(features, image_type=image_type,
                               threshold=threshold)

    def decode(self, images, save=None, round=4, names=None, **kwargs):
        """ Decodes a set of images.

        Args:
          images: The images to decode. Can be:
            - A single String specifying the filename of the image to decode
            - A list of filenames
            - A single NumPy array containing the image data
          save: Optional filename to save results to. If None (default), returns
            all results as an array.
          round: Optional integer indicating number of decimals to round result
            to. Defaults to 4.
          names: Optional list of names corresponding to the images in filenames.
            If passed, must be of same length and in same order as filenames.
            By default, the columns in the output will be named using the image
            filenames.

        Returns:
          An n_features x n_files numpy array, where each feature is a row and
          each image is a column. The meaning of the values depends on the
          decoding method used. """

        if isinstance(images, basestring):
            images = [images]

        if isinstance(images, list):
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
        else:
            imgs_to_decode = images

        methods = {
            'pearson': self._pearson_correlation,
            'dot': self._dot_product,
            'roi': self._roi_association
        }

        result = np.around(
            methods[self.method](imgs_to_decode, **kwargs), round)

        # if save is not None:

        if names is None:
            if type(images).__module__ == np.__name__:
                names = ['image_%d' % i for i in range(images.shape[1])]
            elif self.method == 'roi':
                names = ['cluster_%d' % i for i in range(result.shape[1])]
            else:
                names = images

        result = pd.DataFrame(result, columns=names, index=self.feature_names)

        if save is not None:
            result.to_csv(save, index_label='Feature')
        return result

    def set_method(self, method):
        """ Set decoding method. """
        self.method = method

    def load_features(self, features, image_type=None, from_array=False,
                      threshold=0.001):
        """ Load features from current Dataset instance or a list of files. 
        Args:
            features: List containing paths to, or names of, features to
                extract. Each element in the list must be a string containing
                either a path to an image, or the name of a feature (as named
                in the current Dataset). Mixing of paths and feature names
                within the list is not allowed.
            image_type: Optional suffix indicating which kind of image to use
                for analysis. Only used if features are taken from the Dataset;
                if features is a list of filenames, image_type is ignored.
            from_array: If True, the features argument is interpreted as a
                string pointing to the location of a 2D ndarray on disk
                containing feature data, where rows are voxels and columns are
                individual features.
            threshold: If features are taken from the dataset, this is the
                threshold passed to the meta-analysis module to generate fresh
                images.

        """
        if from_array:
            if isinstance(features, list):
                features = features[0]
            self._load_features_from_array(features)
        elif path.exists(features[0]):
            self._load_features_from_images(features)
        else:
            self._load_features_from_dataset(
                features, image_type=image_type, threshold=threshold)

    def _load_features_from_array(self, features):
        """ Load feature data from a 2D ndarray on disk. """
        self.feature_images = np.load(features)
        self.feature_names = range(self.feature_images.shape[1])

    def _load_features_from_dataset(self, features=None, image_type=None,
                                    threshold=0.001):
        """ Load feature image data from the current Dataset instance. See
        load_features() for documentation.
        """
        self.feature_names = self.dataset.feature_table.feature_names
        if features is not None:
            self.feature_names = filter(
                lambda x: x in self.feature_names, features)
        from neurosynth.analysis import meta
        self.feature_images = meta.analyze_features(
            self.dataset, self.feature_names, image_type=image_type,
            threshold=threshold)
        # Apply a mask if one was originally passed
        if self.masker.layers:
            in_mask = self.masker.get_mask(in_global_mask=True)
            self.feature_images = self.feature_images[in_mask, :]

    def _load_features_from_images(self, images, names=None):
        """ Load feature image data from image files.

        Args:
          images: A list of image filenames.
          names: An optional list of strings to use as the feature names. Must
            be in the same order as the images.
        """
        if names is not None and len(names) != len(images):
            raise Exception(
                "Lists of feature names and images must be of same length!")
        self.feature_names = names if names is not None else images
        self.feature_images = imageutils.load_imgs(images, self.masker)

    def train_classifiers(self, features=None):
        ''' Train a set of classifiers '''
        # for f in features:
        #     clf = Classifier(None)
        #     self.classifiers.append(clf)
        pass

    def _pearson_correlation(self, imgs_to_decode):
        """ Decode images using Pearson's r.

        Computes the correlation between each input image and each feature
        image across voxels.

        Args:
            imgs_to_decode: An ndarray of images to decode, with voxels in rows
                and images in columns.

        Returns:
            An n_features x n_images 2D array, with each cell representing the
            pearson correlation between the i'th feature and the j'th image
            across all voxels.
        """
        x, y = imgs_to_decode.astype(float), self.feature_images.astype(float)
        return self._xy_corr(x, y)

    def _dot_product(self, imgs_to_decode):
        """ Decoding using the dot product.
        """
        return np.dot(imgs_to_decode.T, self.feature_images).T

    def _roi_association(self, imgs_to_decode, value='z', binarize=None):
        """ Computes the strength of association between activation in a mask
        and presence/absence of a semantic feature. This is essentially a
        generalization of the voxel-wise reverse inference z-score to the
        multivoxel case.
        """
        imgs_to_decode = imgs_to_decode.squeeze()
        x = average_within_regions(self.dataset, imgs_to_decode).astype(float)
        y = self.dataset.feature_table.data[self.feature_names].values
        if binarize is not None:
            y[y > binarize] = 1.
            y[y < 1.] = 0.
        r = self._xy_corr(x.T, y)
        if value == 'r':
            return r
        elif value == 'z':
            f_r = np.arctanh(r)
            return f_r*np.sqrt(y.shape[0]-3)

    def _xy_corr(self, x, y):
        x, y = x - x.mean(0), y - y.mean(0)
        x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
        return x.T.dot(y).T

    def plot_polar(self, data, n_top=3, overplot=False, labels=None,
                   palette='husl'):

        r = np.linspace(0, 10, num=100)
        n_panels = data.shape[1]

        if labels is None:
            labels = []
            for i in range(n_panels):
                labels.extend(data.iloc[:, i].order(ascending=False)
                              .index[:n_top])
            labels = np.unique(labels)

        data = data.loc[labels, :]

        # Use hierarchical clustering to order
        from scipy.spatial.distance import pdist
        from scipy.cluster.hierarchy import linkage, leaves_list
        dists = pdist(data, metric='correlation')
        pairs = linkage(dists)
        order = leaves_list(pairs)
        data = data.iloc[order, :]
        labels = [labels[i] for i in order]

        theta = np.linspace(0.0, 2 * np.pi, len(labels), endpoint=False)
        if overplot:
            fig, ax = plt.subplots(1, 1, subplot_kw=dict(polar=True))
            fig.set_size_inches(10, 10)
        else:
            fig, axes = plt.subplots(n_panels, 1, sharex=False, sharey=False,
                                     subplot_kw=dict(polar=True))
            fig.set_size_inches((6, 6 * n_panels))
        # A bit silly to import seaborn just for this...
        # should extract just the color_palette functionality.
        import seaborn as sns
        colors = sns.color_palette(palette, n_panels)
        for i in range(n_panels):
            if overplot:
                alpha = 0.2
            else:
                ax = axes[i]
                alpha = 0.8
            ax.set_ylim(data.values.min(), data.values.max())
            d = data.iloc[:, i].values
            ax.fill(
                theta, d, ec='k', alpha=alpha, color=colors[i], linewidth=2)
            ax.set_xticks(theta)
            ax.set_xticklabels(labels, fontsize=18)
            [lab.set_fontsize(18) for lab in ax.get_yticklabels()]
            ax.set_title('Cluster %d' % i, fontsize=22, y=1.12)
        plt.tight_layout()
        return plt

コード例 #11

ファイルを表示

ファイル: decode.py プロジェクト: gadfli/neurosynth

class Decoder:

    def __init__(self, dataset=None, method='pearson', features=None, mask=None, image_type='pFgA_z', threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
          dataset: An optional Dataset instance containing features to use in decoding.
          method: The decoding method to use (optional). By default, Pearson correlation.
          features: Optional list of features to use in decoding. If None, use all
            features found in dataset. If features is a list of strings, use only the
            subset of features in the Dataset that are named in the list. If features
            is a list of filenames, ignore the dataset entirely and use only the
            features passed as image files in decoding.
          mask: An optional mask to apply to features and input images. If None, will use
            the one in the current Dataset.
          image_type: An optional string indicating the type of image to use when constructing
            feature-based images. See meta.analyze_features() for details. By default, uses 
            reverse inference z-score images.
          threshold: If decoding from a Dataset instance, this is the feature threshold to 
            use to generate the feature maps used in the decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
                self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        self.load_features(features, image_type=image_type, threshold=threshold)


    def decode(self, images, save=None, round=4, names=None):
        """ Decodes a set of images.

        Args:
          images: The images to decode. Can be:
            - A single String specifying the filename of the image to decode
            - A list of filenames
            - A single NumPy array containing the image data
          save: Optional filename to save results to. If None (default), returns
            all results as an array.
          round: Optional integer indicating number of decimals to round result
            to. Defaults to 4.
          names: Optional list of names corresponding to the images in filenames.
            If passed, must be of same length and in same order as filenames.
            By default, the columns in the output will be named using the image
            filenames.

        Returns:
          An n_features x n_files numpy array, where each feature is a row and
          each image is a column. The meaning of the values depends on the
          decoding method used. """

        if isinstance(images, basestring) or isinstance(images, list):
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
        else:
            imgs_to_decode = images

        methods = {
            'pearson': self._pearson_correlation(imgs_to_decode),
            # 'nb': self._naive_bayes(imgs_to_decode),
            'pattern': self._pattern_expression(imgs_to_decode)
        }

        result = np.around(methods[self.method], round)

        if save is not None:

            if names is None:
                if type(images).__module__ == np.__name__:
                    names = ['image_%d' for i in range(images.shape[1])]
                else:
                    names = images

            rownames = np.array(
                self.feature_names, dtype='|S32')[:, np.newaxis]

            f = open(save, 'w')
            f.write('\t'.join(['Feature'] + names) + '\n')
            np.savetxt(f, np.hstack((
                rownames, result)), fmt='%s', delimiter='\t')
        else:
            return result

    def set_method(self, method):
        """ Set decoding method. """
        self.method = method

    def load_features(self, features, image_type=None, from_array=False, threshold=0.001):
        """ Load features from current Dataset instance or a list of files. 
        Args:
            features: List containing paths to, or names of, features to extract. 
                Each element in the list must be a string containing either a path to an
                image, or the name of a feature (as named in the current Dataset).
                Mixing of paths and feature names within the list is not allowed.
            image_type: Optional suffix indicating which kind of image to use for analysis.
                Only used if features are taken from the Dataset; if features is a list 
                of filenames, image_type is ignored.
            from_array: If True, the features argument is interpreted as a string pointing 
                to the location of a 2D ndarray on disk containing feature data, where
                rows are voxels and columns are individual features.
            threshold: If features are taken from the dataset, this is the threshold 
                passed to the meta-analysis module to generate fresh images.

        """
        if from_array:
            if isinstance(features, list):
                features = features[0]
            self._load_features_from_array(features)
        elif path.exists(features[0]):
            self._load_features_from_images(features)
        else:
            self._load_features_from_dataset(features, image_type=image_type, threshold=threshold)

    def _load_features_from_array(self, features):
        """ Load feature data from a 2D ndarray on disk. """
        self.feature_images = np.load(features)
        self.feature_names = range(self.feature_images.shape[1])

    def _load_features_from_dataset(self, features=None, image_type=None, threshold=0.001):
        """ Load feature image data from the current Dataset instance. See load_features()
        for documentation.
        """
        self.feature_names = self.dataset.feature_table.feature_names
        if features is not None:
            self.feature_names = filter(lambda x: x in self.feature_names, features)
        from neurosynth.analysis import meta
        self.feature_images = meta.analyze_features(
            self.dataset, self.feature_names, image_type=image_type, threshold=threshold)
        # Apply a mask if one was originally passed
        if self.masker.layers:
            in_mask = self.masker.get_current_mask(in_global_mask=True)
            self.feature_images = self.feature_images[in_mask,:]

    def _load_features_from_images(self, images, names=None):
        """ Load feature image data from image files.

        Args:
          images: A list of image filenames.
          names: An optional list of strings to use as the feature names. Must be
            in the same order as the images.
        """
        if names is not None and len(names) != len(images):
            raise Exception( "Lists of feature names and image files must be of same length!")
        self.feature_names = names if names is not None else images
        self.feature_images = imageutils.load_imgs(images, self.masker)

    def train_classifiers(self, features=None):
        ''' Train a set of classifiers '''
        # for f in features:
        #     clf = Classifier(None)
        #     self.classifiers.append(clf)
        pass
        
    def _pearson_correlation(self, imgs_to_decode):
        """ Decode images using Pearson's r.

        Computes the correlation between each input image and each feature image across
        voxels.

        Args:
          imgs_to_decode: An ndarray of images to decode, with voxels in rows and images
            in columns.

        Returns:
          An n_features x n_images 2D array, with each cell representing the pearson
          correlation between the i'th feature and the j'th image across all voxels.
        """
        x, y = imgs_to_decode.astype(float), self.feature_images.astype(float)
        x, y = x - x.mean(0), y - y.mean(0)
        x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0))
        return x.T.dot(y).T

    def _pattern_expression(self, imgs_to_decode):
        """ Decode images using pattern expression. For explanation, see:
        http://wagerlab.colorado.edu/wiki/doku.php/help/fmri_help/pattern_expression_and_connectivity
        """
        return np.dot(imgs_to_decode.T, self.feature_images).T

コード例 #12

ファイルを表示

ファイル: decode.py プロジェクト: margulies/neurosynth

class Decoder:
    def __init__(self,
                 dataset=None,
                 method='pearson',
                 features=None,
                 mask=None,
                 image_type='pFgA_z',
                 threshold=0.001):
        """ Initialize a new Decoder instance.

        Args:
          dataset: An optional Dataset instance containing features to use in decoding.
          method: The decoding method to use (optional). By default, Pearson correlation.
          features: Optional list of features to use in decoding. If None, use all
            features found in dataset. If features is a list of strings, use only the
            subset of features in the Dataset that are named in the list. If features
            is a list of filenames, ignore the dataset entirely and use only the
            features passed as image files in decoding.
          mask: An optional mask to apply to features and input images. If None, will use
            the one in the current Dataset.
          image_type: An optional string indicating the type of image to use when constructing
            feature-based images. See meta.analyze_features() for details. By default, uses 
            reverse inference z-score images.
          threshold: If decoding from a Dataset instance, this is the feature threshold to 
            use to generate the feature maps used in the decoding.


        """

        self.dataset = dataset

        if dataset is not None:
            self.masker = self.dataset.masker
            if features is None:
                features = dataset.get_feature_names()
            if mask is not None:
                self.masker.add(mask)
        elif mask is not None:
            self.masker = Masker(mask)
        else:
            self.masker = None

        self.method = method.lower()

        self.load_features(features,
                           image_type=image_type,
                           threshold=threshold)

    def decode(self, images, save=None, round=4, names=None):
        """ Decodes a set of images.

        Args:
          images: The images to decode. Can be:
            - A single String specifying the filename of the image to decode
            - A list of filenames
            - A single NumPy array containing the image data
          save: Optional filename to save results to. If None (default), returns
            all results as an array.
          round: Optional integer indicating number of decimals to round result
            to. Defaults to 4.
          names: Optional list of names corresponding to the images in filenames.
            If passed, must be of same length and in same order as filenames.
            By default, the columns in the output will be named using the image
            filenames.

        Returns:
          An n_features x n_files numpy array, where each feature is a row and
          each image is a column. The meaning of the values depends on the
          decoding method used. """

        if isinstance(images, basestring) or isinstance(images, list):
            imgs_to_decode = imageutils.load_imgs(images, self.masker)
        else:
            imgs_to_decode = images

        methods = {
            'pearson': self._pearson_correlation(imgs_to_decode),
            # 'nb': self._naive_bayes(imgs_to_decode),
            'pattern': self._pattern_expression(imgs_to_decode)
        }

        result = np.around(methods[self.method], round)

        if save is not None:

            if names is None:
                if type(images).__module__ == np.__name__:
                    names = ['image_%d' for i in range(images.shape[1])]
                else:
                    names = images

            rownames = np.array(self.feature_names, dtype='|S32')[:,
                                                                  np.newaxis]

            f = open(save, 'w')
            f.write('\t'.join(['Feature'] + names) + '\n')
            np.savetxt(f,
                       np.hstack((rownames, result)),
                       fmt='%s',
                       delimiter='\t')
        else:
            return result

    def set_method(self, method):
        """ Set decoding method. """
        self.method = method

    def load_features(self,
                      features,
                      image_type=None,
                      from_array=False,
                      threshold=0.001):
        """ Load features from current Dataset instance or a list of files. 
        Args:
            features: List containing paths to, or names of, features to extract. 
                Each element in the list must be a string containing either a path to an
                image, or the name of a feature (as named in the current Dataset).
                Mixing of paths and feature names within the list is not allowed.
            image_type: Optional suffix indicating which kind of image to use for analysis.
                Only used if features are taken from the Dataset; if features is a list 
                of filenames, image_type is ignored.
            from_array: If True, the features argument is interpreted as a string pointing 
                to the location of a 2D ndarray on disk containing feature data, where
                rows are voxels and columns are individual features.
            threshold: If features are taken from the dataset, this is the threshold 
                passed to the meta-analysis module to generate fresh images.

        """
        if from_array:
            if isinstance(features, list):
                features = features[0]
            self._load_features_from_array(features)
        elif path.exists(features[0]):
            self._load_features_from_images(features)
        else:
            self._load_features_from_dataset(features,
                                             image_type=image_type,
                                             threshold=threshold)

    def _load_features_from_array(self, features):
        """ Load feature data from a 2D ndarray on disk. """
        self.feature_images = np.load(features)
        self.feature_names = range(self.feature_images.shape[1])

    def _load_features_from_dataset(self,
                                    features=None,
                                    image_type=None,
                                    threshold=0.001):
        """ Load feature image data from the current Dataset instance. See load_features()
        for documentation.
        """
        self.feature_names = self.dataset.feature_table.feature_names
        if features is not None:
            self.feature_names = filter(lambda x: x in self.feature_names,
                                        features)
        from neurosynth.analysis import meta
        self.feature_images = meta.analyze_features(self.dataset,
                                                    self.feature_names,
                                                    image_type=image_type,
                                                    threshold=threshold)
        # Apply a mask if one was originally passed
        if self.masker.layers:
            in_mask = self.masker.get_current_mask(in_global_mask=True)
            self.feature_images = self.feature_images[in_mask, :]

    def _load_features_from_images(self, images, names=None):
        """ Load feature image data from image files.

        Args:
          images: A list of image filenames.
          names: An optional list of strings to use as the feature names. Must be
            in the same order as the images.
        """
        if names is not None and len(names) != len(images):
            raise Exception(
                "Lists of feature names and image files must be of same length!"
            )
        self.feature_names = names if names is not None else images
        self.feature_images = imageutils.load_imgs(images, self.masker)

    def train_classifiers(self, features=None):
        ''' Train a set of classifiers '''
        # for f in features:
        #     clf = Classifier(None)
        #     self.classifiers.append(clf)
        pass

    def _pearson_correlation(self, imgs_to_decode):
        """ Decode images using Pearson's r.

        Computes the correlation between each input image and each feature image across
        voxels.

        Args:
          imgs_to_decode: An ndarray of images to decode, with voxels in rows and images
            in columns.

        Returns:
          An n_features x n_images 2D array, with each cell representing the pearson
          correlation between the i'th feature and the j'th image across all voxels.
        """
        x, y = imgs_to_decode.astype(float), self.feature_images.astype(float)
        x, y = x - x.mean(0), y - y.mean(0)
        x, y = x / np.sqrt((x**2).sum(0)), y / np.sqrt((y**2).sum(0))
        return x.T.dot(y).T

    def _pattern_expression(self, imgs_to_decode):
        """ Decode images using pattern expression. For explanation, see:
        http://wagerlab.colorado.edu/wiki/doku.php/help/fmri_help/pattern_expression_and_connectivity
        """
        return np.dot(imgs_to_decode.T, self.feature_images).T

コード例 #13

ファイルを表示

    def __init__(self,
                 dataset,
                 cluster_on='coactivation',
                 global_mask=None,
                 roi_mask=None,
                 reference_mask=None,
                 features=None,
                 feature_threshold=0.0,
                 min_voxels_per_study=None,
                 min_studies_per_voxel=None,
                 dimension_reduction='ward',
                 n_components=500,
                 distance_metric='correlation',
                 clustering_method='ward',
                 output_dir='.',
                 prefix=None,
                 parcellation_kwargs={},
                 clustering_kwargs={}):
        """ Initialize Clusterer.
        Args:
            dataset (Dataset): The Dataset instance to use for clustering.
            cluster_on (str): The kind of data to use as the basis for voxel
                clustering--i.e., what defines features passed to the clustering
                algorithm. Valid options:
                    'studies': features are individual studies.
                    'coactivation': uses a precomputed distance matrix based on
                        coactivation of all voxels in the roi_mask across all
                        observations in the reference_data.
            global_mask: An image defining the space to use for all analyses. If None, the 
                mask found in the Dataset will be used.
            roi_mask: An image that determines which voxels to cluster. All non-zero voxels
                will be included in the clustering analysis. When roi_mask is None, all 
                voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask
                can be an image filename, a nibabel image, or an already-masked array with 
                the same dimensions as the global_mask.
            reference_mask: An image defining the voxels to base the distance matrix 
                computation on. All non-zero voxels will be used to compute the distance
                matrix. For example, if the roi_mask contains voxels in only the insula, 
                and reference_mask contains voxels in only the cerebellum, then voxels in 
                the insula will be clustered based on the similarity of their coactvation 
                with all and only cerebellum voxels.
            features: Optional features to use for selecting a subset of the studies in the 
                Dataset instance. If dataset is a numpy matrix, will be ignored.
            feature_threshold: float; the threshold to use for feature selection. Will be 
                ignored if features is None.
            min_voxels_per_study: An optional integer. If provided, all voxels with fewer 
                than this number of studies will be removed from analysis.
            min_studies_per_voxel: An optional integer. If provided, all studies with fewer 
                than this number of active voxels will be removed from analysis. 
            dimension_reduction: Either a scikit-learn object with a fit_transform method,
                or the name of the parcellation method to use for reducing the dimensionality 
                of the reference mask. Valid options include:
                    None: no parcellation
                    'ward': spatially constrained hierarchical clustering; see Thirion
                        et al (2014)
                    'pca': principal component analysis
                    'grid': downsample the reference mask to an isometric grid
                Defaults to 'ward'. Note that parcellation will only be used if method 
                is set to 'coactivation' (i.e., it will be ignored by default).
            n_components: Number of components to request, if using a dimension_reduction method.
                Meaning depends on parcellation algorithm.
            distance_metric: Optional string providing the distance metric to use for 
                computation of a distance matrix. When None, no distance matrix is computed
                and we assume that clustering will be done on the raw data. Valid options 
                are any of the strings accepted by sklearn's pairwise_distances method.
                Defaults to 'correlation'. Note that for some clustering methods (e.g., k-means), 
                no distance matrix will be computed, and this argument will be ignored.
            clustering_method: Algorithm to use for clustering. Must be one of 'ward', 'spectral',
                'agglomerative', 'dbscan', 'kmeans', or 'minik'. If None, can be set 
                later via set_algorithm() or cluster().
            output_directory: Directory to use for writing all outputs.
            prefix: Optional prefix to prepend to all outputted directories/files.
            parcellation_kwargs: Optional keyword arguments to pass to parcellation object.
            clustering_kwargs: Optional keyword arguments to pass to clustering object.
        """

        self.output_dir = output_dir
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        self.prefix = prefix

        # Save all arguments for metadata output
        self.args = {}
        for a in ([
                'output_dir', 'features', 'feature_threshold', 'global_mask',
                'roi_mask', 'reference_mask', 'distance_metric'
        ] + clustering_kwargs.keys() + parcellation_kwargs.keys()):
            self.args[a] = locals()[a]

        self.set_algorithm(clustering_method, **clustering_kwargs)

        self.dataset = dataset

        self.masker = deepcopy(
            dataset.masker) if global_mask is None else Masker(global_mask)

        # Condition study inclusion on specific features
        if features is not None:
            data = self.dataset.get_ids_by_features(
                features, threshold=feature_threshold, get_image_data=True)
        else:
            data = self.dataset.get_image_data()

        # Trim data based on minimum number of voxels or studies
        if min_studies_per_voxel is not None:
            logger.info("Thresholding voxels based on number of studies.")
            av = self.masker.unmask(data.sum(1) > min_studies_per_voxel,
                                    output='vector')
            self.masker.add(av)

        if min_voxels_per_study is not None:
            logger.info("Thresholding studies based on number of voxels.")
            active_studies = np.where(data.sum(0) > min_voxels_per_study)[0]
            data = data[:, active_studies]

        self.data = data

        self.set_reference_data(method=cluster_on, mask=reference_mask)

        # Dimensionality reduction
        if dimension_reduction is not None:
            self.dimension_reduction(dimension_reduction, n_components)

        # Set the voxels to cluster
        if roi_mask is not None:
            self.masker.add(roi_mask)
        self.roi_data = data[self.masker.get_mask(), :]
        # if roi_mask is not None: self.masker.remove(-1)
        if distance_metric is not None:
            self.create_distance_matrix(distance_metric=distance_metric)

コード例 #14

ファイルを表示

ファイル: cluster.py プロジェクト: margulies/neurosynth

    def __init__(self,
                 dataset=None,
                 algorithm=None,
                 output_dir='.',
                 grid_scale=None,
                 features=None,
                 feature_threshold=0.0,
                 global_mask=None,
                 roi_mask=None,
                 distance_mask=None,
                 min_voxels_per_study=None,
                 min_studies_per_voxel=None,
                 distance_metric=None,
                 **kwargs):
        """ Initialize Clusterer.
        Args:
            dataset: The dataset to use for clustering. Either a Dataset instance or a numpy
                array with voxels in rows and features in columns.
            algorithm: Optional algorithm to use for clustering. If None, an algorithm 
                must be passed to the cluster() method later.
            output_directory: Directory to use for writing all outputs.
            grid_scale: Optional integer. If provided, a 3D grid will be applied to the 
                image data, with values in all voxels in each grid cell being averaged 
                prior to clustering analysis. This is an effective means of dimension 
                reduction in cases where the data are otherwise too large for clustering.
            features: Optional features to use for selecting a subset of the studies in the 
                Dataset instance. If dataset is a numpy matrix, will be ignored.
            feature_threshold: float; the threshold to use for feature selection. Will be 
                ignored if features is None.
            global_mask: An image defining the space to use for all analyses. Only necessary
                if dataset is a numpy array.
            roi_mask: An image that determines which voxels to cluster. All non-zero voxels
                will be included in the clustering analysis. When roi_mask is None, all 
                voxels in the global_mask (i.e., the whole brain) will be clustered. roi_mask
                can be an image filename, a nibabel image, or an already-masked array with 
                the same dimensions as the global_mask.
            distance_mask: An image defining the voxels to base the distance matrix 
                computation on. All non-zero voxels will be used to compute the distance
                matrix. For example, if the roi_mask contains voxels in only the insula, 
                and distance_mask contains voxels in only the cerebellum, then voxels in 
                the insula will be clustered based on the similarity of their coactvation 
                with all and only cerebellum voxels.
            min_voxels_per_study: An optional integer. If provided, all voxels with fewer 
                than this number of studies will be removed from analysis.
            min_studies_per_voxel: An optional integer. If provided, all studies with fewer 
                than this number of active voxels will be removed from analysis.
            distance_metric: Optional string providing the distance metric to use for 
                computation of a distance matrix. When None, no distance matrix is computed
                and we assume that clustering will be done on the raw data.
            **kwargs: Additional keyword arguments to pass to the clustering algorithm.

        """

        self.output_dir = output_dir

        if algorithm is not None:
            self._set_clustering_algorithm(algorithm, **kwargs)

        if isinstance(dataset, Dataset):

            self.dataset = dataset

            if global_mask is None:
                global_mask = dataset.masker

            if features is not None:
                data = self.dataset.get_ids_by_features(
                    features, threshold=feature_threshold, get_image_data=True)
            else:
                data = self.dataset.get_image_data()

            # if min_studies_per_voxel is not None:
            #     logger.info("Thresholding voxels based on number of studies.")
            #     sum_vox = data.sum(1)
            #     # Save the indices for later reconstruction
            #     active_vox = np.where(sum_vox > min_studies_per_voxel)[0]
            #     n_active_vox = active_vox.shape[0]

            # if min_voxels_per_study is not None:
            #     logger.info("Thresholding studies based on number of voxels.")
            #     sum_studies = data.sum(0)
            #     active_studies = np.where(sum_studies > min_voxels_per_study)[0]
            #     n_active_studies = active_studies.shape[0]

            # if min_studies_per_voxel is not None:
            #     logger.info("Selecting voxels with more than %d studies." % min_studies_per_voxel)
            #     data = data[active_vox, :]

            # if min_voxels_per_study is not None:
            #     logger.info("Selecting studies with more than %d voxels." % min_voxels_per_study)
            #     data = data[:, active_studies]

            self.data = data

        else:
            self.data = dataset

            if global_mask is None:
                raise ValueError(
                    "If dataset is a numpy array, a valid global_mask (filename, "
                    + "Mask instance, or nibabel image) must be passed.")

        if not isinstance(global_mask, Masker):
            global_mask = Masker(global_mask)

        self.masker = global_mask

        if distance_mask is not None:
            self.masker.add(distance_mask)
            if grid_scale is not None:
                self.target_data, _ = nsr.apply_grid(self.data,
                                                     masker=self.masker,
                                                     scale=grid_scale,
                                                     threshold=None)
            else:
                vox = self.masker.get_current_mask(in_global_mask=True)
                self.target_data = self.data[vox, :]

            self.masker.reset()

        if roi_mask is not None:
            self.masker.add(roi_mask)

        if grid_scale is not None:
            self.data, self.grid = nsr.apply_grid(self.data,
                                                  masker=self.masker,
                                                  scale=grid_scale,
                                                  threshold=None)
        else:
            vox = self.masker.get_current_mask(in_global_mask=True)
            self.data = self.data[vox, :]

        if distance_metric is not None:
            self.create_distance_matrix(distance_metric=distance_metric)