Example #1
0
class PDistTargetSimilarity(Measure):
    """Calculate the correlations of PDist measures with a target

    Target dissimilarity correlation `Measure`. Computes the correlation between
    the dissimilarity matrix defined over the pairwise distances between the
    samples of dataset and the target dissimilarity matrix.
    """

    is_trained = True
    """Indicate that this measure is always trained."""

    pairwise_metric = Parameter('correlation', constraints='str', doc="""\
          Distance metric to use for calculating pairwise vector distances for
          dissimilarity matrix (DSM).  See scipy.spatial.distance.pdist for
          all possible metrics.""")

    comparison_metric = Parameter('pearson',
                                   constraints=EnsureChoice('pearson',
                                                            'spearman'),
                                   doc="""\
          Similarity measure to be used for comparing dataset DSM with the
          target DSM.""")

    center_data = Parameter(False, constraints='bool', doc="""\
          If True then center each column of the data matrix by subtracing the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    corrcoef_only = Parameter(False, constraints='bool', doc="""\
          If True, return only the correlation coefficient (rho), otherwise
          return rho and probability, p.""")

    def __init__(self, target_dsm, **kwargs):
        """
        Parameters
        ----------
        target_dsm : array (length N*(N-1)/2)
          Target dissimilarity matrix

        Returns
        -------
        Dataset
          If ``corrcoef_only`` is True, contains one feature: the correlation
          coefficient (rho); or otherwise two-fetaures: rho plus p.
        """
        # init base classes first
        Measure.__init__(self, **kwargs)
        self.target_dsm = target_dsm
        if self.params.comparison_metric == 'spearman':
            self.target_dsm = rankdata(target_dsm)

    def _call(self,dataset):
        data = dataset.samples
        if self.params.center_data:
            data = data - np.mean(data,0)
        dsm = pdist(data,self.params.pairwise_metric)
        if self.params.comparison_metric=='spearman':
            dsm = rankdata(dsm)
        rho, p = pearsonr(dsm,self.target_dsm)
        if self.params.corrcoef_only:
            return Dataset([rho], fa={'metrics': ['rho']})
        else:
            return Dataset([[rho,p]], fa={'metrics': ['rho', 'p']})
Example #2
0
class PDistConsistency(Measure):
    """Calculate the correlations of PDist measures across chunks

    This measures the consistency in similarity structure across runs
    within individuals, or across individuals if the target dataset is made from
    several subjects in some common space and where the sample attribute
    specified as the chunks_attr codes for subject identity.

    @author: ACC Aug 2013
    """
    is_trained = True
    """Indicate that this measure is always trained."""

    chunks_attr = Parameter('chunks', constraints='str', doc="""\
          Chunks attribute to use for chunking dataset. Can be any samples
          attribute.""")

    pairwise_metric = Parameter('correlation', constraints='str', doc="""\
          Distance metric to use for calculating dissimilarity matrices from
          the set of samples in each chunk specified. See
          spatial.distance.pdist for all possible metrics.""")

    consistency_metric = Parameter('pearson',
                                   constraints=EnsureChoice('pearson',
                                                            'spearman'),
                                   doc="""\
          Correlation measure to use for the correlation between dissimilarity
          matrices.""")

    center_data = Parameter(False, constraints='bool', doc="""\
          If True then center each column of the data matrix by subtracing the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    square = Parameter(False, constraints='bool', doc="""\
          If True return the square distance matrix, if False, returns the
          flattened upper triangle.""")


    def __init__(self, **kwargs):
        """
        Returns
        -------
        Dataset
          Contains the pairwise correlations between the DSMs
          computed from each chunk of the input dataset. If square is False,
          this is a column vector of length N(N-1)/2 for N chunks. If square
          is True, this is a square matrix of size NxN for N chunks.
        """
        # TODO: Another metric for consistency metric could be the "Rv"
        # coefficient...  (ac)
        # init base classes first
        Measure.__init__(self, **kwargs)

    def _call(self, dataset):
        """Computes the average correlation in similarity structure across chunks."""

        chunks_attr = self.params.chunks_attr
        nchunks = len(dataset.sa[chunks_attr].unique)
        if nchunks < 2:
            raise StandardError("This measure calculates similarity consistency across "
                                "chunks and is not meaningful for datasets with only "
                                "one chunk:")
        dsms = []
        chunks = []
        for chunk in dataset.sa[chunks_attr].unique:
            data = np.atleast_2d(
                    dataset.samples[dataset.sa[chunks_attr].value == chunk,:])
            if self.params.center_data:
                data = data - np.mean(data,0)
            dsm = pdist(data, self.params.pairwise_metric)
            dsms.append(dsm)
            chunks.append(chunk)
        dsms = np.vstack(dsms)

        if self.params.consistency_metric=='spearman':
            dsms = np.apply_along_axis(rankdata, 1, dsms)
        corrmat = np.corrcoef(dsms)
        if self.params.square:
            ds = Dataset(corrmat, sa={self.params.chunks_attr: chunks})
        else:
            ds = Dataset(squareform(corrmat,checks=False),
                         sa=dict(pairs=list(combinations(chunks, 2))))
        return ds
Example #3
0
class GDA(Classifier):
    """Gaussian Discriminant Analysis -- base for LDA and QDA

    """

    __tags__ = ['binary', 'multiclass', 'oneclass']

    prior = Parameter('laplacian_smoothing',
                      constraints=EnsureChoice('laplacian_smoothing',
                                               'uniform', 'ratio'),
                      doc="""How to compute prior distribution.""")

    allow_pinv = Parameter(
        True,
        constraints='bool',
        doc="""Allow pseudo-inverse in case of degenerate covariance(s).""")

    def __init__(self, **kwargs):
        """Initialize a GDA classifier.
        """

        # init base class first
        Classifier.__init__(self, **kwargs)

        # pylint friendly initializations
        self.means = None
        """Means of features per class"""
        self.cov = None
        """Co-variances per class, but "vars" is taken ;)"""
        self.ulabels = None
        """Labels classifier was trained on"""
        self.priors = None
        """Class probabilities"""
        self.nsamples_per_class = None
        """Number of samples per class - used by derived classes"""

        # Define internal state of classifier
        self._norm_weight = None

    def _get_priors(self, nlabels, nsamples, nsamples_per_class):
        """Return prior probabilities given data
        """
        prior = self.params.prior
        if prior == 'uniform':
            priors = np.ones((nlabels, )) / nlabels
        elif prior == 'laplacian_smoothing':
            priors = (1+np.squeeze(nsamples_per_class)) \
                          / (float(nsamples) + nlabels)
        elif prior == 'ratio':
            priors = np.squeeze(nsamples_per_class) / float(nsamples)
        else:
            raise ValueError, \
                  "No idea on how to handle '%s' way to compute priors" \
                  % self.params.prior
        return np.atleast_1d(priors)

    def _train(self, dataset):
        """Train the classifier using `dataset` (`Dataset`).
        """
        params = self.params
        targets_sa_name = self.get_space()
        targets_sa = dataset.sa[targets_sa_name]

        # get the dataset information into easy vars
        X = dataset.samples
        labels = targets_sa.value
        self.ulabels = ulabels = targets_sa.unique
        nlabels = len(ulabels)
        label2index = dict((l, il) for il, l in enumerate(ulabels))

        # set the feature dimensions
        nsamples = len(X)
        nfeatures = dataset.nfeatures

        self.means = means = \
                     np.zeros((nlabels, nfeatures))
        # degenerate dimension are added for easy broadcasting later on
        # XXX might want to remove -- for now taken from GNB as is
        self.nsamples_per_class = nsamples_per_class \
                                  = np.zeros((nlabels, 1))
        self.cov = cov = \
                     np.zeros((nlabels, nfeatures, nfeatures))

        # Estimate cov
        # better loop than repmat! ;)
        for l, il in label2index.iteritems():
            Xl = X[labels == l]
            nsamples_per_class[il] = len(Xl)
            # TODO: degenerate case... no samples for known label for
            #       some reason?
            means[il] = np.mean(Xl, axis=0)
            # since we have means already lets do manually cov here
            Xldm = Xl - means[il]
            cov[il] = np.dot(Xldm.T, Xldm)
            # scaling will be done correspondingly in LDA or QDA

        # Store prior probabilities
        self.priors = self._get_priors(nlabels, nsamples, nsamples_per_class)

        if __debug__ and 'GDA' in debug.active:
            debug(
                'GDA', "training finished on data.shape=%s " % (X.shape, ) +
                "min:max(data)=%f:%f" % (np.min(X), np.max(X)))

    def _untrain(self):
        """Untrain classifier and reset all learnt params
        """
        self.means = None
        self.cov = None
        self.ulabels = None
        self.priors = None
        super(GDA, self)._untrain()

    @accepts_dataset_as_samples
    def _predict(self, data):
        """Predict the output for the provided data.
        """
        params = self.params

        self.ca.estimates = prob_cs_cp = self._g_k(data)

        # Take the class with maximal (log)probability
        # XXX in GNB it is axis=0, i.e. classes were first
        winners = prob_cs_cp.argmax(axis=1)
        predictions = [self.ulabels[c] for c in winners]

        if __debug__ and 'GDA' in debug.active:
            debug(
                'GDA', "predict on data.shape=%s min:max(data)=%f:%f " %
                (data.shape, np.min(data), np.max(data)))

        return predictions

    def _inv(self, cov):
        try:
            return np.linalg.inv(cov)
        except Exception, e:
            if self.params.allow_pinv:
                try:
                    return np.linalg.pinv(cov)
                except Exception, e:
                    pass
            raise DegenerateInputError, \
              "Data is probably singular, since inverse fails. Got %s"\
              % (e,)
Example #4
0
class ProcrusteanMapper(ProjectionMapper):
    """Mapper to project from one space to another using Procrustean
    transformation (shift + scaling + rotation).

    Training this mapper requires data for both source and target space to be
    present in the training dataset. The source space data is taken from the
    training dataset's ``samples``, while the target space is taken from a
    sample attribute corresponding to the ``space`` setting of the
    ProcrusteanMapper.

    See: http://en.wikipedia.org/wiki/Procrustes_transformation
    """
    scaling = Parameter(
        True,
        constraints='bool',
        doc="""Estimate a global scaling factor for the transformation
                       (no longer rigid body)""")
    reflection = Parameter(
        True,
        constraints='bool',
        doc="""Allow for the data to be reflected (so it might not be
                     a rotation. Effective only for non-oblique transformations.
                     """)
    reduction = Parameter(
        True,
        constraints='bool',
        doc="""If true, it is allowed to map into lower-dimensional
                     space. Forward transformation might be suboptimal then and
                     reverse transformation might not recover all original
                     variance.""")
    oblique = Parameter(
        False,
        constraints='bool',
        doc="""Either to allow non-orthogonal transformation -- might
                     heavily overfit the data if there is less samples than
                     dimensions. Use `oblique_rcond`.""")
    oblique_rcond = Parameter(
        -1,
        constraints='float',
        doc="""Cutoff for 'small' singular values to regularize the
                     inverse. See :class:`~numpy.linalg.lstsq` for more
                     information.""")
    svd = Parameter(
        'numpy',
        constraints=EnsureChoice('numpy', 'scipy', 'dgesvd'),
        doc="""Implementation of SVD to use. dgesvd requires ctypes to
                 be available.""")

    def __init__(self, space='targets', **kwargs):
        ProjectionMapper.__init__(self, space=space, **kwargs)

        self._scale = None
        """Estimated scale"""
        if self.params.svd == 'dgesvd' and not externals.exists(
                'liblapack.so'):
            warning(
                "Reverting choice of svd for ProcrusteanMapper to be default "
                "'numpy' since liblapack.so seems not to be available for "
                "'dgesvd'")
            self.params.svd = 'numpy'

    def _train(self, source):
        params = self.params
        # Since it is unsupervised, we don't care about labels
        datas = ()
        odatas = ()
        means = ()
        shapes = ()

        assess_residuals = __debug__ and 'MAP_' in debug.active

        target = source.sa[self.get_space()].value

        for i, ds in enumerate((source, target)):
            if is_datasetlike(ds):
                data = np.asarray(ds.samples)
            else:
                data = ds
            if assess_residuals:
                odatas += (data, )
            if self._demean:
                if i == 0:
                    mean = self._offset_in
                else:
                    mean = data.mean(axis=0)
                data = data - mean
            else:
                # no demeaning === zero means
                mean = np.zeros(shape=data.shape[1:])
            means += (mean, )
            datas += (data, )
            shapes += (data.shape, )

        # shortcuts for sizes
        sn, sm = shapes[0]
        tn, tm = shapes[1]

        # Check the sizes
        if sn != tn:
            raise ValueError, "Data for both spaces should have the same " \
                  "number of samples. Got %d in source and %d in target space" \
                  % (sn, tn)

        # Sums of squares
        ssqs = [np.sum(d**2, axis=0) for d in datas]

        # XXX check for being invariant?
        #     needs to be tuned up properly and not raise but handle
        for i in xrange(2):
            if np.all(ssqs[i] <= np.abs((np.finfo(datas[i].dtype).eps * sn *
                                         means[i])**2)):
                raise ValueError, "For now do not handle invariant in time datasets"

        norms = [np.sqrt(np.sum(ssq)) for ssq in ssqs]
        normed = [data / norm for (data, norm) in zip(datas, norms)]

        # add new blank dimensions to source space if needed
        if sm < tm:
            normed[0] = np.hstack((normed[0], np.zeros((sn, tm - sm))))

        if sm > tm:
            if params.reduction:
                normed[1] = np.hstack((normed[1], np.zeros((sn, sm - tm))))
            else:
                raise ValueError, "reduction=False, so mapping from " \
                      "higher dimensionality " \
                      "source space is not supported. Source space had %d " \
                      "while target %d dimensions (features)" % (sm, tm)

        source, target = normed
        if params.oblique:
            # Just do silly linear system of equations ;) or naive
            # inverse problem
            if sn == sm and tm == 1:
                T = np.linalg.solve(source, target)
            else:
                T = np.linalg.lstsq(source, target,
                                    rcond=params.oblique_rcond)[0]
            ss = 1.0
        else:
            # Orthogonal transformation
            # figure out optimal rotation
            if params.svd == 'numpy':
                U, s, Vh = np.linalg.svd(np.dot(target.T, source),
                                         full_matrices=False)
            elif params.svd == 'scipy':
                # would raise exception if not present
                externals.exists('scipy', raise_=True)
                import scipy
                U, s, Vh = scipy.linalg.svd(np.dot(target.T, source),
                                            full_matrices=False)
            elif params.svd == 'dgesvd':
                from mvpa2.support.lapack_svd import svd as dgesvd
                U, s, Vh = dgesvd(np.dot(target.T, source),
                                  full_matrices=True,
                                  algo='svd')
            else:
                raise ValueError('Unknown type of svd %r' % (params.svd))
            T = np.dot(Vh.T, U.T)

            if not params.reflection:
                # then we need to assure that it is only rotation
                # "recipe" from
                # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
                # for more and info and original references, see
                # http://dx.doi.org/10.1007%2FBF02289451
                s_new = np.ones_like(s)
                s_new[-1] = np.linalg.det(T)
                T = np.dot(Vh.T * s_new, U.T)

            # figure out scale and final translation
            if not params.reflection:
                ss = np.sum(s_new * s)
            else:
                ss = np.sum(s)

        # if we were to collect standardized distance
        # std_d = 1 - sD**2

        # select out only relevant dimensions
        if sm != tm:
            T = T[:sm, :tm]

        self._scale = scale = ss * norms[1] / norms[0]
        # Assign projection
        if self.params.scaling:
            proj = scale * T
        else:
            proj = T
        self._proj = proj

        if self._demean:
            self._offset_out = means[1]

        if __debug__ and 'MAP_' in debug.active:
            # compute the residuals
            res_f = self.forward(odatas[0])
            d_f = np.linalg.norm(odatas[1] - res_f) / np.linalg.norm(odatas[1])
            res_r = self.reverse(odatas[1])
            d_r = np.linalg.norm(odatas[0] - res_r) / np.linalg.norm(odatas[0])
            debug(
                'MAP_', "%s, residuals are forward: %g,"
                " reverse: %g" % (repr(self), d_f, d_r))

    def _compute_recon(self):
        """For Procrustean mapper, inverse is transpose.
        So, let's skip computing inverse in the super class.
        """
        # XXX Change pinv to superclass compute_recon?
        if self.params.oblique:
            #return ProjectionMapper._compute_recon(self)
            return np.linalg.pinv(self._proj)
        else:
            return np.transpose(
                self._proj /
                self._scale**2) if self.params.scaling else np.transpose(
                    self._proj)
class GroupClusterThreshold_NN3(Learner):
    """Statistical evaluation of group-level average accuracy maps

    This algorithm can be used to perform cluster-thresholding of
    searchlight-based group analyses. It implements a two-stage procedure that
    uses the results of within-subject permutation analyses, estimates a per
    feature cluster forming threshold (via bootstrap), and uses the thresholded
    bootstrap samples to estimate the distribution of cluster sizes in
    group-average accuracy maps under the NULL hypothesis, as described in [1]_.

    Note: this class implements a modified version of that algorithm. The
    present implementation differs in, at least, four aspects from the
    description in that paper.

    1) Cluster p-values refer to the probability of observing a particular
       cluster size or a larger one (original paper: probability to observe a
       larger cluster only).  Consequently, probabilities reported by this
       implementation will have a tendency to be higher in comparison.

    2) Clusters found in the original (unpermuted) accuracy map are always
       included in the NULL distribution estimate of cluster sizes. This
       provides an explicit lower bound for probabilities, as there will
       always be at least one observed cluster for every cluster size found
       in the original accuracy map. Consequently, it is impossible to get a
       probability of zero for clusters of any size (see [2] for more
       information).

    3) Bootstrap accuracy maps that contain no clusters are counted in a
       dedicated size-zero bin in the NULL distribution of cluster sizes.
       This change yields reliable cluster-probabilities even for very low
       featurewise threshold probabilities, where (some portion) of the
       bootstrap accuracy maps do not contain any clusters.

    4) The method for FWE-correction used by the original authors is not
       provided. Instead, a range of alternatives implemented by the
       statsmodels package are available.

    Moreover, this implementation minimizes the required memory demands and
    allows for computing large numbers of bootstrap samples without
    significant increase in memory demand (CPU time trade-off).

    Instances of this class must be trained before than can be used to
    threshold accuracy maps. The training dataset must match the following
    criteria:

    1) For every subject in the group, it must contain multiple accuracy maps
       that are the result of a within-subject classification analysis
       based on permuted class labels. One map must corresponds to one fixed
       permutation for all features in the map, as described in [1]_. The
       original authors recommend 100 accuracy maps per subject for a typical
       searchlight analysis.

    2) It must contain a sample attribute indicating which sample is
       associated with which subject, because bootstrapping average accuracy
       maps is implemented by randomly drawing one map from each subject.
       The name of the attribute can be configured via the ``chunk_attr``
       parameter.

    After training, an instance can be called with a dataset to perform
    threshold and statistical evaluation. Unless a single-sample dataset
    is passed, all samples in the input dataset will be averaged prior
    thresholding.

    Returns
    -------
    Dataset
      This is a shallow copy of the input dataset (after a potential
      averaging), hence contains the same data and attributes. In addition it
      includes the following attributes:

      ``fa.featurewise_thresh``
        Vector with feature-wise cluster-forming thresholds.

      ``fa.clusters_featurewise_thresh``
        Vector with labels for clusters after thresholding the input data
        with the desired feature-wise probability. Each unique non-zero
        element corresponds to an individual super-threshold cluster. Cluster
        values are sorted by cluster size (number of features). The largest
        cluster is always labeled with ``1``.

      ``fa.clusters_fwe_thresh``
        Vector with labels for super-threshold clusters after correction for
        multiple comparisons. The attribute is derived from
        ``fa.clusters_featurewise_thresh`` by removing all clusters that
        do not pass the threshold when controlling for the family-wise error
        rate.

      ``a.clusterstats``
        Record array with information on all detected clusters. The array is
        sorted according to cluster size, starting with the largest cluster
        in terms of number of features. The array contains the fields ``size``
        (number of features comprising the cluster), ``mean``, ``median``,
        min``, ``max``, ``std`` (respective descriptive statistics for all
        clusters), and ``prob_raw`` (probability of observing the cluster of a
        this size or larger under the NULL hypothesis). If correction for
        multiple comparisons is enabled an additional field ``prob_corrected``
        (probability after correction) is added.

      ``a.clusterlocations``
        Record array with information on the location of all detected clusters.
        The array is sorted according to cluster size (same order as
        ``a.clusterstats``. The array contains the fields ``max``
        (feature coordinate of the maximum score within the cluster, and
        ``center_of_mass`` (coordinate of the center of mass; weighted by
        the feature values within the cluster.

    References
    ----------
    .. [1] Johannes Stelzer, Yi Chen and Robert Turner (2013). Statistical
       inference and multiple testing correction in classification-based
       multi-voxel pattern analysis (MVPA): Random permutations and cluster
       size control. NeuroImage, 65, 69--82.
    .. [2] Smyth, G. K., & Phipson, B. (2010). Permutation P-values Should
       Never Be Zero: Calculating Exact P-values When Permutations Are
       Randomly Drawn. Statistical Applications in Genetics and Molecular
       Biology, 9, 1--12.
    """

    n_bootstrap = Parameter(
        100000, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of bootstrap samples to be generated from the training
            dataset. For each sample, an average map will be computed from a
            set of randomly drawn samples (one from each chunk). Bootstrap
            samples will be used to estimate a featurewise NULL distribution of
            accuracy values for initial thresholding, and to estimate the NULL
            distribution of cluster sizes under the NULL hypothesis. A larger
            number of bootstrap samples reduces the lower bound of
            probabilities, which may be beneficial for multiple comparison
            correction.""")

    feature_thresh_prob = Parameter(
        0.001, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0),
        doc="""Feature-wise probability threshold. The value corresponding
            to this probability in the NULL distribution of accuracies will
            be used as threshold for cluster forming. Given that the NULL
            distribution is estimated per feature, the actual threshold value
            will vary across features yielding a threshold vector. The number
            of bootstrap samples need to be adequate for a desired probability.
            A ``ValueError`` is raised otherwise.""")

    chunk_attr = Parameter(
        'chunks',
        doc="""Name of the attribute indicating the individual chunks from
            which a single sample each is drawn for averaging into a bootstrap
            sample.""")

    fwe_rate = Parameter(
        0.05, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0),
        doc="""Family-wise error rate for multiple comparison correction
            of cluster size probabilities.""")

    multicomp_correction = Parameter(
        'fdr_bh', constraints=EnsureChoice('bonferroni', 'sidak', 'holm-sidak',
                                           'holm', 'simes-hochberg', 'hommel',
                                           'fdr_bh', 'fdr_by', None),
        doc="""Strategy for multiple comparison correction of cluster
            probabilities. All methods supported by statsmodels' ``multitest``
            are available. In addition, ``None`` can be specified to disable
            correction.""")

    n_blocks = Parameter(
        1, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of segments used to compute the feature-wise NULL
            distributions. This parameter determines the peak memory demand.
            In case of a single segment a matrix of size
            (n_bootstrap x nfeatures) will be allocated. Increasing the number
            of segments reduces the peak memory demand by that roughly factor.
            """)

    n_proc = Parameter(
        1, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of parallel processes to use for computation.
            Requires `joblib` external module.""")

    def __init__(self, **kwargs):
        # force disable auto-train: would make no sense
        Learner.__init__(self, auto_train=False, **kwargs)
        if 1. / (self.params.n_bootstrap + 1) > self.params.feature_thresh_prob:
            raise ValueError('number of bootstrap samples is insufficient for'
                             ' the desired threshold probability')
        self.untrain()

    def _untrain(self):
        self._thrmap = None
        self._null_cluster_sizes = None

    @due.dcite(
        Doi("10.1016/j.neuroimage.2012.09.063"),
        description="Statistical assessment of (searchlight) MVPA results",
        tags=['implementation'])
    def _train(self, ds):
        # shortcuts
        chunk_attr = self.params.chunk_attr
        #
        # Step 0: bootstrap maps by drawing one for each chunk and average them
        # (do N iterations)
        # this could take a lot of memory, hence instead of computing the maps
        # we compute the source maps they can be computed from and then (re)build
        # the matrix of bootstrapped maps either row-wise or column-wise (as
        # needed) to save memory by a factor of (close to) `n_bootstrap`
        # which samples belong to which chunk
        chunk_samples = dict([(c, np.where(ds.sa[chunk_attr].value == c)[0])
                              for c in ds.sa[chunk_attr].unique])
        # pre-built the bootstrap combinations
        bcombos = [[random.sample(v, 1)[0] for v in chunk_samples.values()]
                   for i in xrange(self.params.n_bootstrap)]
        bcombos = np.array(bcombos, dtype=int)
        #
        # Step 1: find the per-feature threshold that corresponds to some p
        # in the NULL
        segwidth = ds.nfeatures / self.params.n_blocks
        # speed things up by operating on an array not a dataset
        ds_samples = ds.samples
        if __debug__:
            debug('GCTHR',
                  'Compute per-feature thresholds in %i blocks of %i features'
                  % (self.params.n_blocks, segwidth))
        # Execution can be done in parallel as the estimation is independent
        # across features

        def featuresegment_producer(ncols):
            for segstart in xrange(0, ds.nfeatures, ncols):
                # one average map for every stored bcombo
                # this also slices the input data into feature subsets
                # for the compute blocks
                yield [np.mean(
                       # get a view to a subset of the features
                       # -- should be somewhat efficient as feature axis is
                       # sliced
                       ds_samples[sidx, segstart:segstart + ncols],
                       axis=0)
                       for sidx in bcombos]
        if self.params.n_proc == 1:
            # Serial execution
            thrmap = np.hstack(  # merge across compute blocks
                [get_thresholding_map(d, self.params.feature_thresh_prob)
                 # compute a partial threshold map for as many features
                 # as fit into a compute block
                 for d in featuresegment_producer(segwidth)])
        else:
            # Parallel execution
            verbose_level_parallel = 50 \
                if (__debug__ and 'GCTHR' in debug.active) else 0
            # local import as only parallel execution needs this
            from joblib import Parallel, delayed
            # same code as above, just in parallel with joblib's Parallel
            thrmap = np.hstack(
                Parallel(n_jobs=self.params.n_proc,
                         pre_dispatch=self.params.n_proc,
                         verbose=verbose_level_parallel)(
                             delayed(get_thresholding_map)
                        (d, self.params.feature_thresh_prob)
                             for d in featuresegment_producer(segwidth)))
        # store for later thresholding of input data
        self._thrmap = thrmap
        #
        # Step 2: threshold all NULL maps and build distribution of NULL cluster
        #         sizes
        #
        cluster_sizes = Counter()
        # recompute the bootstrap average maps to threshold them and determine
        # cluster sizes
        dsa = dict(mapper=ds.a.mapper) if 'mapper' in ds.a else {}
        if __debug__:
            debug('GCTHR', 'Estimating NULL distribution of cluster sizes')
        # this step can be computed in parallel chunks to speeds things up
        if self.params.n_proc == 1:
            # Serial execution
            for sidx in bcombos:
                avgmap = np.mean(ds_samples[sidx], axis=0)[None]
                # apply threshold
                clustermap = avgmap > thrmap
                # wrap into a throw-away dataset to get the reverse mapping right
                bds = Dataset(clustermap, a=dsa)
                # this function reverse-maps every sample one-by-one, hence no need
                # to collect chunks of bootstrapped maps
                cluster_sizes = get_cluster_sizes(bds, cluster_sizes)
        else:
            # Parallel execution
            # same code as above, just restructured for joblib's Parallel
            for jobres in Parallel(n_jobs=self.params.n_proc,
                                   pre_dispatch=self.params.n_proc,
                                   verbose=verbose_level_parallel)(
                                       delayed(get_cluster_sizes)
                                  (Dataset(np.mean(ds_samples[sidx],
                                           axis=0)[None] > thrmap,
                                           a=dsa))
                                       for sidx in bcombos):
                # aggregate
                cluster_sizes += jobres
        # store cluster size histogram for later p-value evaluation
        # use a sparse matrix for easy consumption (max dim is the number of
        # features, i.e. biggest possible cluster)
        scl = dok_matrix((1, ds.nfeatures + 1), dtype=int)
        for s in cluster_sizes:
            scl[0, s] = cluster_sizes[s]
        self._null_cluster_sizes = scl

    def _call(self, ds):
        if len(ds) > 1:
            # average all samples into one, assuming we got something like one
            # sample per subject as input
            avgr = mean_sample()
            ds = avgr(ds)
        # threshold input; at this point we only have one sample left
        thrd = ds.samples[0] > self._thrmap
        # mapper default
        mapper = IdentityMapper()
        # overwrite if possible
        if hasattr(ds, 'a') and 'mapper' in ds.a:
            mapper = ds.a.mapper
        # reverse-map input
        othrd = _verified_reverse1(mapper, thrd)
        # TODO: what is your purpose in life osamp? ;-)
        osamp = _verified_reverse1(mapper, ds.samples[0])
        # prep output dataset
        outds = ds.copy(deep=False)
        outds.fa['featurewise_thresh'] = self._thrmap
        # determine clusters
        labels, num = measurements.label(othrd,structure=np.ones([3,3,3]))
        area = measurements.sum(othrd,
                                labels,
                                index=np.arange(1, num + 1)).astype(int)
        com = measurements.center_of_mass(
            osamp, labels=labels, index=np.arange(1, num + 1))
        maxpos = measurements.maximum_position(
            osamp, labels=labels, index=np.arange(1, num + 1))
        # for the rest we need the labels flattened
        labels = mapper.forward1(labels)
        # relabel clusters starting with the biggest and increase index with
        # decreasing size
        ordered_labels = np.zeros(labels.shape, dtype=int)
        ordered_area = np.zeros(area.shape, dtype=int)
        ordered_com = np.zeros((num, len(osamp.shape)), dtype=float)
        ordered_maxpos = np.zeros((num, len(osamp.shape)), dtype=float)
        for i, idx in enumerate(np.argsort(area)):
            ordered_labels[labels == idx + 1] = num - i
            # kinda ugly, but we are looping anyway
            ordered_area[i] = area[idx]
            ordered_com[i] = com[idx]
            ordered_maxpos[i] = maxpos[idx]
        labels = ordered_labels
        area = ordered_area[::-1]
        com = ordered_com[::-1]
        maxpos = ordered_maxpos[::-1]
        del ordered_labels  # this one can be big
        # store cluster labels after forward-mapping
        outds.fa['clusters_featurewise_thresh'] = labels.copy()
        # location info
        outds.a['clusterlocations'] = \
            np.rec.fromarrays(
                [com, maxpos], names=('center_of_mass', 'max'))

        # update cluster size histogram with the actual result to get a
        # proper lower bound for p-values
        # this will make a copy, because the original matrix is int
        cluster_probs_raw = _transform_to_pvals(
            area, self._null_cluster_sizes.astype('float'))

        clusterstats = (
            [area, cluster_probs_raw],
            ['size', 'prob_raw']
        )
        # evaluate a bunch of stats for all clusters
        morestats = {}
        for cid in xrange(len(area)):
            # keep clusters on outer loop, because selection is more expensive
            clvals = ds.samples[0, labels == cid + 1]
            for id_, fx in (
                    ('mean', np.mean),
                    ('median', np.median),
                    ('min', np.min),
                    ('max', np.max),
                    ('std', np.std)):
                stats = morestats.get(id_, [])
                stats.append(fx(clvals))
                morestats[id_] = stats

        for k, v in morestats.items():
            clusterstats[0].append(v)
            clusterstats[1].append(k)

        if self.params.multicomp_correction is not None:
            # do a local import as only this tiny portion needs statsmodels
            import statsmodels.stats.multitest as smm
            rej, probs_corr = smm.multipletests(
                cluster_probs_raw,
                alpha=self.params.fwe_rate,
                method=self.params.multicomp_correction)[:2]
            # store corrected per-cluster probabilities
            clusterstats[0].append(probs_corr)
            clusterstats[1].append('prob_corrected')
            # remove cluster labels that did not pass the FWE threshold
            for i, r in enumerate(rej):
                if not r:
                    labels[labels == i + 1] = 0
            outds.fa['clusters_fwe_thresh'] = labels
        outds.a['clusterstats'] = \
            np.rec.fromarrays(clusterstats[0], names=clusterstats[1])
        return outds
Example #6
0
class GNB(Classifier):
    """Gaussian Naive Bayes `Classifier`.

    `GNB` is a probabilistic classifier relying on Bayes rule to
    estimate posterior probabilities of labels given the data.  Naive
    assumption in it is an independence of the features, which allows
    to combine per-feature likelihoods by a simple product across
    likelihoods of "independent" features.
    See http://en.wikipedia.org/wiki/Naive_bayes for more information.

    Provided here implementation is "naive" on its own -- various
    aspects could be improved, but it has its own advantages:

    - implementation is simple and straightforward
    - no data copying while considering samples of specific class
    - provides alternative ways to assess prior distribution of the
      classes in the case of unbalanced sets of samples (see parameter
      `prior`)
    - makes use of NumPy broadcasting mechanism, so should be
      relatively efficient
    - should work for any dimensionality of samples

    `GNB` is listed both as linear and non-linear classifier, since
    specifics of separating boundary depends on the data and/or
    parameters: linear separation is achieved whenever samples are
    balanced (or ``prior='uniform'``) and features have the same
    variance across different classes (i.e. if
    ``common_variance=True`` to enforce this).

    Whenever decisions are made based on log-probabilities (parameter
    ``logprob=True``, which is the default), then conditional
    attribute `values`, if enabled, would also contain
    log-probabilities.  Also mention that normalization by the
    evidence (P(data)) is disabled by default since it has no impact
    per se on classification decision.  You might like to set
    parameter normalize to True if you want to access properly scaled
    probabilities in `values` conditional attribute.
    """
    # XXX decide when should we set corresponding internal,
    #     since it depends actually on the data -- no clear way,
    #     so set both linear and non-linear
    __tags__ = [ 'gnb', 'linear', 'non-linear',
                       'binary', 'multiclass' ]

    common_variance = Parameter(False, constraints='bool',
             doc="""Use the same variance across all classes.""")

    prior = Parameter('laplacian_smoothing',
             constraints=EnsureChoice('laplacian_smoothing', 'uniform', 'ratio'),
             doc="""How to compute prior distribution.""")

    logprob = Parameter(True, constraints='bool',
             doc="""Operate on log probabilities.  Preferable to avoid unneeded
             exponentiation and loose precision.
             If set, logprobs are stored in `values`""")

    normalize = Parameter(False, constraints='bool',
             doc="""Normalize (log)prob by P(data).  Requires probabilities thus
             for `logprob` case would require exponentiation of 'logprob's, thus
             disabled by default since does not impact classification output.
             """)

    def __init__(self, **kwargs):
        """Initialize an GNB classifier.
        """

        # init base class first
        Classifier.__init__(self, **kwargs)

        # pylint friendly initializations
        self.means = None
        """Means of features per class"""
        self.variances = None
        """Variances per class, but "vars" is taken ;)"""
        self.ulabels = None
        """Labels classifier was trained on"""
        self.priors = None
        """Class probabilities"""

        # Define internal state of classifier
        self._norm_weight = None

    def _get_priors(self, nlabels, nsamples, nsamples_per_class):
        """Return prior probabilities given data
        """
        # helper function - squash all dimensions but 1
        squash = lambda x: np.atleast_1d(x.squeeze())

        prior = self.params.prior
        if prior == 'uniform':
            priors = np.ones((nlabels,))/nlabels
        elif prior == 'laplacian_smoothing':
            priors = (1+squash(nsamples_per_class)) \
                          / (float(nsamples) + nlabels)
        elif prior == 'ratio':
            priors = squash(nsamples_per_class) / float(nsamples)
        else:
            raise ValueError(
                "No idea on how to handle '%s' way to compute priors"
                % self.params.prior)
        return priors

    def _train(self, dataset):
        """Train the classifier using `dataset` (`Dataset`).
        """
        params = self.params
        targets_sa_name = self.get_space()
        targets_sa = dataset.sa[targets_sa_name]

        # get the dataset information into easy vars
        X = dataset.samples
        labels = targets_sa.value
        self.ulabels = ulabels = targets_sa.unique
        nlabels = len(ulabels)
        label2index = dict((l, il) for il, l in enumerate(ulabels))

        # set the feature dimensions
        nsamples = len(X)
        s_shape = X.shape[1:]           # shape of a single sample

        self.means = means = \
                     np.zeros((nlabels, ) + s_shape)
        self.variances = variances = \
                     np.zeros((nlabels, ) + s_shape)
        # degenerate dimension are added for easy broadcasting later on
        nsamples_per_class = np.zeros((nlabels,) + (1,)*len(s_shape))

        # Estimate means and number of samples per each label
        for s, l in zip(X, labels):
            il = label2index[l]         # index of the label
            nsamples_per_class[il] += 1
            means[il] += s

        # helper function - squash all dimensions but 1
        squash = lambda x: np.atleast_1d(x.squeeze())
        ## Actually compute the means
        non0labels = (squash(nsamples_per_class) != 0)
        means[non0labels] /= nsamples_per_class[non0labels]

        # Store prior probabilities
        self.priors = self._get_priors(nlabels, nsamples, nsamples_per_class)

        # Estimate variances
        # better loop than repmat! ;)
        for s, l in zip(X, labels):
            il = label2index[l]         # index of the label
            variances[il] += (s - means[il])**2

        ## Actually compute the variances
        if params.common_variance:
            # we need to get global std
            cvar = np.sum(variances, axis=0)/nsamples # sum across labels
            # broadcast the same variance across labels
            variances[:] = cvar
        else:
            variances[non0labels] /= nsamples_per_class[non0labels]

        # Precompute and store weighting coefficient for Gaussian
        if params.logprob:
            # it would be added to exponent
            self._norm_weight = -0.5 * np.log(2*np.pi*variances)
        else:
            self._norm_weight = 1.0/np.sqrt(2*np.pi*variances)

        if __debug__ and 'GNB' in debug.active:
            debug('GNB', "training finished on data.shape=%s " % (X.shape, )
                  + "min:max(data)=%f:%f" % (np.min(X), np.max(X)))


    def _untrain(self):
        """Untrain classifier and reset all learnt params
        """
        self.means = None
        self.variances = None
        self.ulabels = None
        self.priors = None
        super(GNB, self)._untrain()


    @accepts_dataset_as_samples
    def _predict(self, data):
        """Predict the output for the provided data.
        """
        params = self.params
        # argument of exponentiation
        scaled_distances = \
            -0.5 * (((data - self.means[:, np.newaxis, ...])**2) \
                          / self.variances[:, np.newaxis, ...])
        if params.logprob:
            # if self.params.common_variance:
            # XXX YOH:
            # For decision there is no need to actually compute
            # properly scaled p, ie 1/sqrt(2pi * sigma_i) could be
            # simply discarded since it is common across features AND
            # classes
            # For completeness -- computing everything now even in logprob
            lprob_csfs = self._norm_weight[:, np.newaxis, ...] \
                         + scaled_distances

            # XXX for now just cut/paste with different operators, but
            #     could just bind them and reuse in the same equations
            # Naive part -- just a product of probabilities across features
            ## First we need to reshape to get class x samples x features
            lprob_csf = lprob_csfs.reshape(
                lprob_csfs.shape[:2] + (-1,))
            ## Now -- sum across features
            lprob_cs = lprob_csf.sum(axis=2)

            # Incorporate class probabilities:
            prob_cs_cp = lprob_cs + np.log(self.priors[:, np.newaxis])

        else:
            # Just a regular Normal distribution with per
            # feature/class mean and variances
            prob_csfs = \
                 self._norm_weight[:, np.newaxis, ...] \
                 * np.exp(scaled_distances)

            # Naive part -- just a product of probabilities across features
            ## First we need to reshape to get class x samples x features
            prob_csf = prob_csfs.reshape(
                prob_csfs.shape[:2] + (-1,))
            ## Now -- product across features
            prob_cs = prob_csf.prod(axis=2)

            # Incorporate class probabilities:
            prob_cs_cp = prob_cs * self.priors[:, np.newaxis]

        # Normalize by evidence P(data)
        if params.normalize:
            if params.logprob:
                prob_cs_cp_real = np.exp(prob_cs_cp)
            else:
                prob_cs_cp_real = prob_cs_cp
            prob_s_cp_marginals = np.sum(prob_cs_cp_real, axis=0)
            if params.logprob:
                prob_cs_cp -= np.log(prob_s_cp_marginals)
            else:
                prob_cs_cp /= prob_s_cp_marginals

        # Take the class with maximal (log)probability
        winners = prob_cs_cp.argmax(axis=0)
        predictions = [self.ulabels[c] for c in winners]

        # set to the probabilities per class
        self.ca.estimates = prob_cs_cp.T

        if __debug__ and 'GNB' in debug.active:
            debug('GNB', "predict on data.shape=%s min:max(data)=%f:%f " %
                  (data.shape, np.min(data), np.max(data)))

        return predictions
Example #7
0
class Regression(Measure):
    """
    Given a dataset, compute regularized regression (Ridge or Lasso) on the
    computed neural dissimilarity matrix using an arbitrary number of predictors
    (model dissimilarity matrices).

    Requires scikit-learn
    """

    is_trained = True
    """Indicate that this measure is always trained."""

    # copied from PDist class XXX: ok or pass it in kwargs?
    pairwise_metric = Parameter('correlation',
                                constraints='str',
                                doc="""\
          Distance metric to use for calculating pairwise vector distances for
          dissimilarity matrix (DSM).  See scipy.spatial.distance.pdist for
          all possible metrics.""")

    center_data = Parameter(False,
                            constraints='bool',
                            doc="""\
          If True then center each column of the data matrix by subtracting the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    method = Parameter('ridge',
                       constraints=EnsureChoice('ridge', 'lasso'),
                       doc='Compute Ridge (l2) or Lasso (l1) regression')

    alpha = Parameter(1.0,
                      constraints='float',
                      doc='alpha parameter for lasso'
                      'regression')

    fit_intercept = Parameter(True,
                              constraints='bool',
                              doc='whether to fit the'
                              'intercept')

    rank_data = Parameter(True,
                          constraints='bool',
                          doc='whether to rank the neural dsm and the '
                          'predictor dsms before running the regression model')

    normalize = Parameter(
        False,
        constraints='bool',
        doc='if True the predictors and neural dsm will be'
        'normalized (z-scored) prior to the regression (and after '
        'the data ranking, if rank_data=True)')

    def __init__(self, predictors, keep_pairs=None, **kwargs):
        """
        Parameters
        ----------
        predictors : array (N*(N-1)/2, n_predictors)
            array containing the upper triangular matrix in vector form of the
            predictor Dissimilarity Matrices. Each column is a predictor dsm.

        keep_pairs : None or list or array
            indices in range(N*(N-1)/2) to keep before running the regression.
            All other elements will be removed. If None, the regression is run
            on the entire DSM.

        Returns
        -------
        Dataset
            a dataset with n_predictors samples and one feature. If fit_intercept
            is True, the last sample is the intercept.
        """
        super(Regression, self).__init__(**kwargs)

        if len(predictors.shape) == 1:
            raise ValueError('predictors have shape {0}. Make sure the array '
                             'is at least 2d and transposed correctly'.format(
                                 predictors.shape))
        self.predictors = predictors
        self.keep_pairs = keep_pairs

    def _call(self, dataset):
        externals.exists('skl', raise_=True)
        from sklearn.linear_model import Lasso, Ridge
        from sklearn.preprocessing import scale

        # first run PDist
        compute_dsm = PDist(pairwise_metric=self.params.pairwise_metric,
                            center_data=self.params.center_data)
        dsm = compute_dsm(dataset)
        dsm_samples = dsm.samples

        if self.params.rank_data:
            dsm_samples = rankdata(dsm_samples)
            predictors = np.apply_along_axis(rankdata, 0, self.predictors)
        else:
            predictors = self.predictors

        if self.params.normalize:
            predictors = scale(predictors, axis=0)
            dsm_samples = scale(dsm_samples, axis=0)

        # keep only the item we want
        if self.keep_pairs is not None:
            dsm_samples = dsm_samples[self.keep_pairs]
            predictors = predictors[self.keep_pairs, :]

        # check that predictors and samples have the correct dimensions
        if dsm_samples.shape[0] != predictors.shape[0]:
            raise ValueError('computed dsm has {0} rows, while predictors have'
                             '{1} rows. Check that predictors have the right'
                             'shape'.format(dsm_samples.shape[0],
                                            predictors.shape[0]))

        # now fit the regression
        if self.params.method == 'lasso':
            reg = Lasso
        elif self.params.method == 'ridge':
            reg = Ridge
        else:
            raise ValueError('I do not know method {0}'.format(
                self.params.method))
        reg_ = reg(alpha=self.params.alpha,
                   fit_intercept=self.params.fit_intercept)
        reg_.fit(predictors, dsm_samples)

        coefs = reg_.coef_.reshape(-1, 1)

        sa = ['coef' + str(i) for i in range(len(coefs))]

        if self.params.fit_intercept:
            coefs = np.vstack((coefs, reg_.intercept_))
            sa += ['intercept']

        return Dataset(coefs, sa={'coefs': sa})
Example #8
0
class IIRFilterMapper(Mapper):
    """Mapper using IIR filters for data transformation.

    This mapper is able to perform any IIR-based low-pass, high-pass, or
    band-pass frequency filtering. This is a front-end for SciPy's filtfilt(),
    hence its usage looks almost exactly identical, and any of SciPy's IIR
    filters can be used with this mapper:

    >>> from scipy import signal
    >>> b, a = signal.butter(8, 0.125)
    >>> mapper = IIRFilterMapper(b, a, padlen=150)

    """

    axis = Parameter(
        0,
        constraints='int',
        doc="""The axis of `x` to which the filter is applied. By default
            the filter is applied to all features along the samples axis""")

    padtype = Parameter(
        'odd',
        constraints=EnsureChoice('odd', 'even', 'constant') | EnsureNone(),
        doc="""Must be 'odd', 'even', 'constant', or None.  This determines
            the type of extension to use for the padded signal to which the
            filter is applied.  If `padtype` is None, no padding is used.  The
            default is 'odd'""")

    padlen = Parameter(
        None,
        constraints=EnsureInt() | EnsureNone(),
        doc="""The number of elements by which to extend `x` at both ends
            of `axis` before applying the filter. This value must be less than
            `x.shape[axis]-1`.  `padlen=0` implies no padding. The default
            value is 3*max(len(a),len(b))""")

    def __init__(self, b, a, **kwargs):
        """
        All constructor parameters are analogs of filtfilt() or are passed
        on to the Mapper base class.

        Parameters
        ----------
        b : (N,) array_like
            The numerator coefficient vector of the filter.
        a : (N,) array_like
            The denominator coefficient vector of the filter.  If a[0]
            is not 1, then both a and b are normalized by a[0].
        """
        Mapper.__init__(self, auto_train=True, **kwargs)
        self.__iir_num = b
        self.__iir_denom = a

    def _forward_data(self, data):
        params = self.params
        try:
            mapped = filtfilt(self.__iir_num,
                              self.__iir_denom,
                              data,
                              axis=params.axis,
                              padtype=params.padtype,
                              padlen=params.padlen)
        except TypeError:
            # we have an ancient scipy, do manually
            # but is will only support 2d arrays
            if params.axis == 0:
                data = data.T
            if params.axis > 1:
                raise ValueError("this version of scipy does not "
                                 "support nd-arrays for filtfilt()")
            if not (params['padlen'].is_default
                    and params['padtype'].is_default):
                warning("this version of scipy.signal.filtfilt() does not "
                        "support `padlen` and `padtype` arguments -- ignoring "
                        "them")
            mapped = [
                filtfilt(self.__iir_num, self.__iir_denom, x) for x in data
            ]
            mapped = np.array(mapped)
            if params.axis == 0:
                mapped = mapped.T
        return mapped
Example #9
0
class PCorrTargetSimilarity(Measure):
    """Calculate the partial correlations of a neural RDM with more than two
    target RDMs. This measure can be used for example when comparing a
    neural RDM with more than one behavioral RDM, and one desires to look
    only at the correlations of the residuals.

    NOTA BENE: this measure computes a distance internally through
    scipy.spatial.pdist, thus you should make sure that the
    predictors are in the correct direction, that is smaller values imply
    higher similarity!
    """

    is_trained = True
    """Indicate that this measure is always trained."""

    pairwise_metric = Parameter('correlation',
                                constraints='str',
                                doc="""\
          Distance metric to use for calculating pairwise vector distances for
          the neural dissimilarity matrix (DSM).  See
          scipy.spatial.distance.pdist for all possible metrics.""")

    correlation_type = Parameter('spearman',
                                 constraints=EnsureChoice(
                                     'spearman', 'pearson'),
                                 doc="""\
          Type of correlation to use between the compute neural RDM and the
          target RDMs. If spearman, the residuals are ranked
          prior to the correlation.""")

    normalize_rdms = Parameter(True,
                               constraints='bool',
                               doc="""\
          If True then center and normalize each column of the neural RDM
          and of the predictor RDMs by subtracting the
          column mean from each element and imposing unit L2 norm.""")

    def __init__(self, target_rdms, **kwargs):
        """
        Parameters
        ----------
        target_rdms : array (length N*(N-1)/2, n_predictors)
          Target dissimilarity matrices
        """
        # init base classes first
        super(PCorrTargetSimilarity, self).__init__(**kwargs)
        self.target_rdms = target_rdms
        self.corrfx = CORRFX[self.params.correlation_type]
        self.normalize_rdms = self.params.normalize_rdms

    def _call(self, dataset):
        """
        Parameters
        ----------
        dataset : input dataset

        Returns
        -------
        Dataset
        each sample `i` correspond to the partial correlation between the
        neural RDM and the `target_dsms[:, i]` partialling out
        `target_dsms[:, j]` with `j != i`.
        """
        data = dataset.samples
        dsm = pdist(data, self.params.pairwise_metric)
        rp = pcorr(dsm[:, None],
                   self.target_rdms,
                   corrfx=self.corrfx,
                   normalize=self.normalize_rdms)
        return Dataset(rp[:, None])