Exemple #1
class PDist(Measure):
    """Compute dissimiliarity matrix for samples in a dataset

    This `Measure` returns the upper triangle of the n x n disimilarity matrix
    defined as the pairwise distances between samples in the dataset, and where
    n is the number of samples.

    is_trained = True # Indicate that this measure is always trained.

    pairwise_metric = Parameter('correlation', constraints='str', doc="""\
          Distance metric to use for calculating pairwise vector distances for
          dissimilarity matrix (DSM).  See scipy.spatial.distance.pdist for
          all possible metrics.""")

    center_data = Parameter(False, constraints='bool', doc="""\
          If True then center each column of the data matrix by subtracing the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    square = Parameter(False, constraints='bool', doc="""\
          If True return the square distance matrix, if False, returns the
          flattened upper triangle.""")

    def __init__(self, **kwargs):
          If square is False, contains a column vector of length = n(n-1)/2 of
          pairwise distances between all samples. A sample attribute ``pairs``
          identifies the indices of input samples for each individual pair.
          If square is True, the dataset contains a square dissimilarty matrix
          and the entire sample attributes collection of the input dataset.

        Measure.__init__(self, **kwargs)

    def _call(self,ds):

        data = ds.samples
        # center data if specified
        if self.params.center_data:
            data = data - np.mean(data,0)

        # get dsm
        dsm = pdist(data,metric=self.params.pairwise_metric)

        # if square return value make dsm square
        if self.params.square:
            # re-add the sample attributes -- should still be valid
            out = Dataset(squareform(dsm),
            # add some attributes
            out = Dataset(dsm,
                          sa=dict(pairs=list(combinations(range(len(ds)), 2))))
        return out
Exemple #2
class PolyKernel(NumpyKernel):
    """Polynomial kernel: K(a,b) = (gamma*a*b.T+coef0)**degree"""
    gamma = Parameter(1, doc='Gamma scaling coefficient')
    degree = Parameter(2, doc="Polynomial degree")
    coef0 = Parameter(1, doc="Offset added to dot product before exponent")

    def _compute(self, d1, d2):
        self._k = np.power(
            self.params.gamma * np.dot(d1, d2.T) + self.params.coef0,
Exemple #3
class SigmoidLSKernel(LSKernel):
    """Sigmoid kernel: K(a,b) = tanh(gamma*a*b.T + coef0)"""
    __kernel_type__ = _svmc.SIGMOID
    __kernel_name__ = 'sigmoid'
    gamma = Parameter(1, doc='Gamma multiplying parameter for SigmoidKernel')
    coef0 = Parameter(1, doc='Offset inside tanh')

    def __init__(self, **kwargs):
        # Necessary for proper docstring construction
        LSKernel.__init__(self, **kwargs)
Exemple #4
class PolyLSKernel(LSKernel):
    """Polynomial kernel: K(a,b) = (gamma*a*b.T + coef0)**degree"""
    __kernel_type__ = _svmc.POLY
    __kernel_name__ = 'poly'
    gamma = Parameter(1, doc='Gamma multiplying parameter for Polynomial')
    degree = Parameter(2, doc='Degree of polynomial')
    coef0 = Parameter(1, doc='Offset inside polynomial')  # aka coef0

    def __init__(self, **kwargs):
        # Necessary for proper docstring construction
        LSKernel.__init__(self, **kwargs)
Exemple #5
class MeanRemoval(Mapper):
    """Subtract sample mean from features."""

    is_trained = True

    in_place = Parameter(
        doc="""If False: a copy of the dataset will be made before demeaning.
        If True: demeaning will be performed in-place, i.e. input data is
        modified. This is faster, but can have side-effects when the original
        dataset is used elsewhere again, and implies that floating point data
        types are required to prevent rounding errors in this case.""",

    def __init__(self, in_place=False, **kwargs):
        Mapper.__init__(self, **kwargs)
        self.in_place = in_place

    def _forward_data(self, data):
        mdata = data
        mean = np.mean(mdata, axis=1)

        if self.in_place:
            if not np.issubdtype(mdata.dtype, float):
                warning("Integer dtype. Mean removal won't work correctly for "
                        "this implementation. Rounding errors will occur. "
                        "Use in_place=False instead")
            mdata -= mean[:, np.newaxis]

            mdata = mdata - mean[:, np.newaxis]
        return mdata
Exemple #6
 class WithFuncChoices(ClassWithCollections):
     C = Parameter('choice1',
                   constraints=EnsureChoice('choice1', np.sum),
     # We need __init__ to get 'custom' docstring
     def __init__(self, **kwargs):
         super(type(self), self).__init__(**kwargs)
class RandomClassifier(Classifier):
    """Dummy classifier deciding on labels absolutely randomly

    __tags__ = ['random', 'non-deterministic', 'oneclass-binary']

    same = Parameter(
        False, constraints='bool',
        doc="If a dataset arrives to predict, assign identical (but random) label "
            "to all samples having the same label in original, thus mimiquing the "
            "situation where testing samples are not independent.")

    def __init__(self, **kwargs):
        Classifier.__init__(self, **kwargs)
        self._ulabels = None

    def _train(self, data):
        self._ulabels = data.sa[self.get_space()].unique

    def _predict(self, data):
        l = len(self._ulabels)
        # oh those lovely random estimates, for now just an estimate
        # per sample. Since we are random after all -- keep it random
        self.ca.estimates = np.random.normal(size=len(data))
        if is_datasetlike(data) and self.params.same:
            # decide on mapping between original labels
            labels_map = dict(
                (t, rt) for t, rt in zip(self._ulabels,
                                         self._ulabels[npr.randint(0, l, size=l)]))
            return [labels_map[t] for t in data.sa[self.get_space()].value]
            # random one per each
            return self._ulabels[npr.randint(0, l, size=len(data))]
Exemple #8
 def test_deprecated_allowedtype(self):
     with assert_warnings(
               "allowedtype option was deprecated in favor of constraints. "
               "Adjust your code, provided value 'str' was ignored")]):
         p = Parameter(1.0, allowedtype="str")
         self.assertRaises(AttributeError, lambda p: p.allowedtype, p)
         self.assertEqual(p.constraints, None)
Exemple #9
class RbfSGKernel(_BasicSGKernel):
    """Radial basis function: K(a,b) = exp(-||a-b||**2/sigma)"""
    __kernel_cls__ = sgk.GaussianKernel
    __kernel_name__ = 'rbf'
    sigma = Parameter(1, doc="Width/division parameter for gaussian kernel")

    def __init__(self, **kwargs):
        # Necessary for proper docstring construction
        _BasicSGKernel.__init__(self, **kwargs)
Exemple #10
class BinomialProportionCI(Mapper):
    """Compute binomial proportion confidence intervals

    This is a convenience frontend for binomial_proportion_ci_from_bool()
    and supports all methods implemented in this function.

    The confidence interval is computed independently for each feature column.
    The returned dataset contains two samples.  The first one contains the
    lower CI boundary and the second sample the upper boundary.


    is_trained = True

    width = Parameter(.95,
                      constraints=EnsureFloat() & EnsureRange(min=0, max=1),
                      doc="Confidence interval width")
    meth = Parameter('jeffreys',
                     constraints=EnsureChoice('wald', 'wilson',
                                              'agresti-coull', 'jeffreys',
                                              'clopper-pearson', 'arc-sine',
                                              'logit', 'anscombe'),
                     doc="Interval estimation method")

    def __init__(self, **kwargs):
        Mapper.__init__(self, **kwargs)

    def _train(self, ds):

    def _forward_data(self, data):
        from mvpa2.misc.stats import binomial_proportion_ci_from_bool
        return binomial_proportion_ci_from_bool(data,
                                                alpha=1 - self.params.width,

    def _forward_dataset(self, ds):
        msamp = self._forward_data(ds.samples)
        mds = Dataset(msamp, sa=dict(ci_boundary=['lower', 'upper']))
        return mds
Exemple #11
class RbfKernel(NumpyKernel):
    """Radial basis function (aka Gausian, aka ) kernel
    K(a,b) = exp(-||a-b||**2/sigma)
    sigma = Parameter(1.0, allowedtype=float, doc="Width parameter sigma")

    def _compute(self, d1, d2):
        # Do the Rbf
        self._k = np.exp(-squared_euclidean_distance(d1, d2) /
Exemple #12
class PolySGKernel(_BasicSGKernel):
    """Polynomial kernel: K(a,b) = (a*b.T + c)**degree
    c is 1 if and only if 'inhomogenous' is True
    __kernel_cls__ = sgk.PolyKernel
    __kernel_name__ = 'poly'
    __kp_order__ = ('degree', 'inhomogenous')
    degree = Parameter(2, allowedtype=int, doc="Polynomial order of the kernel")
    inhomogenous = Parameter(True, allowedtype=bool,
                             doc="Whether +1 is added within the expression")

    if not exists('sg ge 0.6.5'):

        use_normalization = Parameter(False, allowedtype=bool,
                                      doc="Optional normalization")
        __kp_order__ = __kp_order__ + ('use_normalization',)

    def __init__(self, **kwargs):
        # Necessary for proper docstring construction
        _BasicSGKernel.__init__(self, **kwargs)
Exemple #13
class RbfLSKernel(LSKernel):
    """Radial Basis Function kernel (aka Gaussian):
    K(a,b) = exp(-gamma*||a-b||**2)
    __kernel_type__ = _svmc.RBF
    __kernel_name__ = 'rbf'
    gamma = Parameter(1, doc='Gamma multiplying paramater for Rbf')

    def __init__(self, **kwargs):
        # Necessary for proper docstring construction
        LSKernel.__init__(self, **kwargs)
Exemple #14
class ConstantKernel(NumpyKernel):
    """The constant kernel class.

    sigma_0 = Parameter(1.0,
       A simple constant squared value of which is broadcasted across
       kernel. In the case of GPR -- standard deviation of the Gaussian
       prior probability N(0,sigma_0**2) of the intercept of the
       constant regression.""")

    def _compute(self, data1, data2):
        """Compute kernel matrix.

        data1 : numpy.ndarray
          lhs data
        data2 : numpy.ndarray
          rhs data
        self._k = \
            (self.params.sigma_0 ** 2) * np.ones((data1.shape[0], data2.shape[0]))

    ## def set_hyperparameters(self, hyperparameter):
    ##     if hyperparameter < 0:
    ##         raise InvalidHyperparameterError()
    ##     self.sigma_0 = hyperparameter
    ##     return

    def compute_lml_gradient(self, alphaalphaT_Kinv, data):
        K_grad_sigma_0 = 2 * self.params.sigma_0
        # self.lml_gradient = 0.5*(np.trace(np.dot(alphaalphaT_Kinv,K_grad_sigma_0*np.ones(alphaalphaT_Kinv.shape)))
        # Faster formula: np.trace(np.dot(A,B)) = (A*(B.T)).sum()
        # Fastest when B is a constant: B*A.sum()
        self.lml_gradient = 0.5 * np.array(
            K_grad_sigma_0 * alphaalphaT_Kinv.sum())
        #return self.lml_gradient

    def compute_lml_gradient_logscale(self, alphaalphaT_Kinv, data):
        K_grad_sigma_0 = 2 * self.params.sigma_0**2
        self.lml_gradient = 0.5 * np.array(
            K_grad_sigma_0 * alphaalphaT_Kinv.sum())
        #return self.lml_gradient

Exemple #15
    def __init__(self, **kwargs):
        # XXX Determine which parameters depend on each other and implement
        # safety/simplifying logic around them
        # already done for: nr_weight
        # thought: weight and weight_label should be a dict
        """Interface class to LIBSVM classifiers and regressions.

        Default implementation (C/nu/epsilon SVM) is chosen depending
        on the given parameters (C/nu/tube_epsilon).

        svm_impl = kwargs.get('svm_impl', None)
        # Depending on given arguments, figure out desired SVM
        # implementation
        if svm_impl is None:
            for arg, impl in [('tube_epsilon', 'EPSILON_SVR'), ('C', 'C_SVC'),
                              ('nu', 'NU_SVC')]:
                if arg in kwargs:
                    svm_impl = impl
                    if __debug__:
                            'SVM', 'No implementation was specified. Since '
                            '%s is given among arguments, assume %s' %
                            (arg, impl))
            if svm_impl is None:
                svm_impl = 'C_SVC'
                if __debug__:
                    debug('SVM', 'Assign C_SVC "by default"')
        kwargs['svm_impl'] = svm_impl

        # init base class
        _SVM.__init__(self, **kwargs)

        self._svm_type = self._KNOWN_IMPLEMENTATIONS[svm_impl][0]

        if 'nu' in self._KNOWN_PARAMS and 'epsilon' in self._KNOWN_PARAMS:
            # overwrite eps param with new default value (information
            # taken from libSVM docs

        self.params['nr_weight'] = Parameter(len(self.params['weight'].value))
        self.__model = None
        """Holds the trained SVM."""
Exemple #16
    def __init__(self, kernel_cls, kernel_params=[], **kwargs):
        """Initialize CustomSGKernel.

        kernel_cls : Shogun.Kernel
          Class of a Kernel from Shogun
        kernel_params : list
          Each item in this list should be a tuple of (kernelparamname, value),
          and the order is the explicit order required by the Shogun constructor
        self.__kernel_cls__ = kernel_cls # These are normally static

        _BasicSGKernel.__init__(self, **kwargs)
        order = []
        for k, v in kernel_params:
            self.params[k] = Parameter(default=v)
        self.__kp_order__ = tuple(order)
class ConnectivityHyperalignment(SearchlightHyperalignment):
    Given a list of datasets, provide a list of mappers
    into common space using connectivity based hyperalignment.
    This time on Surface!!!

    - Compute the mean time-series for each connectivity target.
    - Use these mean time-series to align each target region and get `npc`
      PC time-series per region that are aligned across individuals (optional).
    - Compute a connectivity profile for each feature (e.g., vertex) depicting its
      connectivities to the targets. If `npc` is None, the mean time-series of
      each target is used; otherwise, the `npc` PC time-series are used.
    - Use SL HA to align the whole cortex based on connectivity profiles.

    See :ref:`Guntupalli et al., Plos Comp. Bio (2018)` for details.
    mask_ids = Parameter(
        constraints=EnsureListOf(int) | EnsureNone(),
        doc="""You can specify a mask to compute searchlight hyperalignment
            only within this mask..""")

    seed_indices = Parameter(
        constraints=EnsureListOf(int) | EnsureNone(),
        doc="""A list of node indices that correspond to seed centers for
            seed queryengines. If None, all centers of seed_queryengines
            are used.""")

    seed_queryengines = Parameter(
        doc="""A list of queryengines to determine seed searchlights for
            connectomes. If a single queryengine is given in the list, then it
            is assumed that it applies to all datasets.""")

    seed_radius = Parameter(
        constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(),
        doc=""" Radius in voxels for seed size in volume.""")

    conn_metric = Parameter(
        lambda x, y: np.dot(x.samples.T, y.samples) / x.nsamples,  #
        doc="""How to compute the connectivity metric between features.
            Default is the dot product of samples (which on zscored data becomes
            correlation if you normalize by nsamples.""")

    npcs = Parameter(
        constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(),
        """Maximum number of PCs to be considered in each surface searchlight.
            If None, use seed mean instead of PCs.

    connectomes = Parameter(
        constraints=EnsureStr() | EnsureNone(),
        doc="""Precomputed connectomes supplied as hdf5 filename (for now).
            It is expected to be a dictionary with key 'hmappers' (for now)."""

    common_model = Parameter(
        constraints=EnsureStr() | EnsureNone(),
        doc="""Precomputed common model supplied as hdf5 filename (for now).
            It is expected to be a dict with feature-targets connectome and
            common models in each target ROI with appropriate pcs (for now).
            Expects 'local_models' and 'connectome_model' keys.""")

    save_model = Parameter(
        constraints=EnsureStr() | EnsureNone(),
        doc="""Precomputed common model supplied as hdf5 filename (for now).
            It is expected to be a tuple with feature-targets connectome and
            common models in each target ROI with appropriate pcs (for now)."""

    def __init__(self, **kwargs):
        SearchlightHyperalignment.__init__(self, **kwargs)

    def _get_seed_means(self, measure, queryengine, dataset, seed_indices):
        # Computing seed data as mean timeseries in each SL
        seed_data = Searchlight(measure,
        seed_data = seed_data(dataset)
        zscore(seed_data, chunks_attr=None)
        return seed_data

    def _get_sl_connectomes(self, seed_means, qe_all, datasets, inode,
        # For each SL, computing connectivity of features to seed means
        sl_connectomes = []
        # Looping over each subject
        for seed_mean, qe_, sd in zip(seed_means, qe_all, datasets):
            sl_ids = qe_[inode]
            if is_datasetlike(sl_ids):
                assert (sl_ids.nsamples == 1)
                sl_ids = sl_ids.samples[0, :].tolist()
            sl_connectomes.append(connectivity_mapper.forward(sd[:, sl_ids]))
        return sl_connectomes

    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [
            slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
            local_common_model = None
        return sl_hmappers, svm, local_common_model

    def _get_connectomes(self, datasets):
        params = self.params
        # If no precomputed connectomes are supplied, compute them.
        if params.connectomes is not None and os.path.exists(
            _chpaldebug("Loading pre-computed connectomes from ",
            connectomes = h5load(params.connectomes)
            return connectomes
        connectivity_mapper = FxyMapper(params.conn_metric)
        # Initializing datasets with original anatomically aligned datasets
        mfm = MeanFeatureMeasure()
        # TODO Handle seed_radius if seed queryengines are not provided
        seed_radius = params.seed_radius
            "Performing surface connectivity hyperalignment with seeds")
        _chpaldebug("Computing connectomes.")
        ndatasets = len(datasets)
        if params.seed_queryengines is None:
            raise NotImplementedError("For now, we need seed queryengines.")
        qe_all = super(ConnectivityHyperalignment,
                           datasets, params.seed_queryengines, seed_radius,
        # If seed_indices are not supplied, use all as centers
        if not params.seed_indices:
            roi_ids = super(ConnectivityHyperalignment,
            roi_ids = params.seed_indices
        if len(qe_all) == 1:
            qe_all *= ndatasets
        # Computing Seed means to be used for aligning seed features
        seed_means = [
            self._get_seed_means(MeanFeatureMeasure(), qe, ds,
            for qe, ds in zip(qe_all, datasets)
        if params.npcs is None:
            conn_targets = []
            for seed_mean in seed_means:
                zscore(seed_mean, chunks_attr=None)
            # compute all PC-seed connectivity in each subject
            # 1. make common model SVs in each seed SL based on connectivity to seed_means
            # 2. Use these SVs for computing connectomes
            _chpaldebug("Aligning SVs in each searchlight across subjects")
            # Looping over all seeds in which SVD is done
            pc_data = [[] for isub in range(ndatasets)]
            sl_common_models = dict()
            if params.common_model is not None and os.path.exists(
                _chpaldebug("Loading common model from %s" %
                common_model = h5load(params.common_model)
                sl_common_models = common_model['local_models']
            for inode in roi_ids:
                # For each SL, computing connectivity of features to seed means
                # This line below doesn't need common model
                sl_connectomes = self._get_sl_connectomes(
                    seed_means, qe_all, datasets, inode, connectivity_mapper)
                # Hyperalign connectomes in SL
                # XXX TODO Common model input to below function should be updated.
                local_common_model = sl_common_models[inode][:, :params.npcs] \
                                        if params.common_model else None
                sl_hmappers, svm, sl_common_model = self._get_hypesvs(
                    sl_connectomes, local_common_model=local_common_model)
                if sl_common_model is not None:
                    sl_common_models[inode] = sl_common_model
                # make common model SV timeseries data in each subject
                for sd, slhm, qe, pcd in zip(datasets, sl_hmappers, qe_all,
                    sd_svs = slhm.forward(sd[:, qe[inode]])
                    zscore(sd_svs, chunks_attr=None)
                    if svm is not None:
                        sd_svs = svm.forward(sd_svs)
                        sd_svs = sd_svs[:, :params.npcs]
                        zscore(sd_svs, chunks_attr=None)
            if params.save_model is not None:
                # TODO: should use debug
                print('Saving local models to %s' % params.save_model)
                h5save(params.save_model, sl_common_models)
            pc_data = [hstack(pcd) for pcd in pc_data]
            conn_targets = pc_data
            #print pc_data[-1]
        # compute connectomes using connectivity targets (PCs or seed means)
        connectomes = []
        if params.common_model is not None and os.path.exists(
            # TODO: should use debug
            print('Loading from saved common model: %s' % params.common_model)
            connectome_model = common_model['connectome_model']
        for t_, ds in zip(conn_targets, datasets):
            connectome = connectivity_mapper.forward(ds)
            t_ = None
            connectome.fa = ds.fa
            if connectome.samples.dtype == 'float64':
                connectome.samples = connectome.samples.astype('float32')
            zscore(connectome, chunks_attr=None)
        if params.connectomes is not None and not os.path.exists(
            _chpaldebug("Saving connectomes to ", params.connectomes)
            h5save(params.connectomes, connectomes)
        return connectomes

               description="Connectivity-based hyperalignment",
    def __call__(self, datasets):
        """Estimate mappers for each dataset

          datasets : list or tuple of datasets

        A list of trained Mappers of the same length as datasets
        connectomes = self._get_connectomes(datasets)
        # TODO Add assertion about nsamples matching across connectomes
            "Performing hyperalignment of %d connectomes with %d samples" %
            (len(connectomes), connectomes[0].nsamples))
        _chpaldebug("Running searchlight hyperalignment")
        conhypmappers = super(ConnectivityHyperalignment,
        _chpaldebug("Finished Connectivity hyperalignment. Returning mappers.")
        return conhypmappers
Exemple #18
class PDistTargetSimilarity(Measure):
    """Calculate the correlations of PDist measures with a target

    Target dissimilarity correlation `Measure`. Computes the correlation between
    the dissimilarity matrix defined over the pairwise distances between the
    samples of dataset and the target dissimilarity matrix.

    is_trained = True
    """Indicate that this measure is always trained."""

    pairwise_metric = Parameter('correlation', constraints='str', doc="""\
          Distance metric to use for calculating pairwise vector distances for
          dissimilarity matrix (DSM).  See scipy.spatial.distance.pdist for
          all possible metrics.""")

    comparison_metric = Parameter('pearson',
          Similarity measure to be used for comparing dataset DSM with the
          target DSM.""")

    center_data = Parameter(False, constraints='bool', doc="""\
          If True then center each column of the data matrix by subtracing the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    corrcoef_only = Parameter(False, constraints='bool', doc="""\
          If True, return only the correlation coefficient (rho), otherwise
          return rho and probability, p.""")

    def __init__(self, target_dsm, **kwargs):
        target_dsm : array (length N*(N-1)/2)
          Target dissimilarity matrix

          If ``corrcoef_only`` is True, contains one feature: the correlation
          coefficient (rho); or otherwise two-fetaures: rho plus p.
        # init base classes first
        Measure.__init__(self, **kwargs)
        self.target_dsm = target_dsm
        if self.params.comparison_metric == 'spearman':
            self.target_dsm = rankdata(target_dsm)

    def _call(self,dataset):
        data = dataset.samples
        if self.params.center_data:
            data = data - np.mean(data,0)
        dsm = pdist(data,self.params.pairwise_metric)
        if self.params.comparison_metric=='spearman':
            dsm = rankdata(dsm)
        rho, p = pearsonr(dsm,self.target_dsm)
        if self.params.corrcoef_only:
            return Dataset([rho], fa={'metrics': ['rho']})
            return Dataset([[rho,p]], fa={'metrics': ['rho', 'p']})
Exemple #19
class Classifier(Learner):
    """Abstract classifier class to be inherited by all classifiers

    # Kept separate from doc to don't pollute help(clf), especially if
    # we including help for the parent class
    _DEV__doc__ = """
    Required behavior:

    For every classifier is has to be possible to be instantiated without
    having to specify the training pattern.

    Repeated calls to the train() method with different training data have to
    result in a valid classifier, trained for the particular dataset.

    It must be possible to specify all classifier parameters as keyword
    arguments to the constructor.

    Recommended behavior:

    Derived classifiers should provide access to *estimates* -- i.e. that
    information that is finally used to determine the predicted class label.

    Michael: Maybe it works well if each classifier provides a 'estimates'
             state member. This variable is a list as long as and in same order
             as Dataset.uniquetargets (training data). Each item in the list
             corresponds to the likelyhood of a sample to belong to the
             respective class. However the semantics might differ between
             classifiers, e.g. kNN would probably store distances to class-
             neighbors, where PLR would store the raw function value of the
             logistic function. So in the case of kNN low is predictive and for
             PLR high is predictive. Don't know if there is the need to unify

             As the storage and/or computation of this information might be
             demanding its collection should be switchable and off be default.

     * predictions  : result of the last call to .predict()
     * estimates : might be different from predictions if a classifier's predict()
                   makes a decision based on some internal value such as
                   probability or a distance.
    # Dict that contains the parameters of a classifier.
    # This shall provide an interface to plug generic parameter optimizer
    # on all classifiers (e.g. grid- or line-search optimizer)
    # A dictionary is used because Michael thinks that access by name is nicer.
    # Additionally Michael thinks ATM that additional information might be
    # necessary in some situations (e.g. reasonably predefined parameter range,
    # minimal iteration stepsize, ...), therefore the value to each key should
    # also be a dict or we should use mvpa2.base.param.Parameter'...

    training_stats = ConditionalAttribute(
        enabled=False, doc="Confusion matrix of learning performance")

    predictions = ConditionalAttribute(enabled=True,
                                       doc="Most recent set of predictions")

    estimates = ConditionalAttribute(
        doc="Internal classifier estimates the most recent " +
        "predictions are based on")

    predicting_time = ConditionalAttribute(
        enabled=True, doc="Time (in seconds) which took classifier to predict")

    __tags__ = []
    """Describes some specifics about the classifier -- is that it is
    doing regression for instance...."""

    # TODO: make it available only for actually retrainable classifiers
    retrainable = Parameter(
        doc="""Either to enable retraining for 'retrainable' classifier.""",

    def __init__(self, space=None, **kwargs):
        # by default we want classifiers to use the 'targets' sample attribute
        # for training/testing
        if space is None:
            space = 'targets'
        Learner.__init__(self, space=space, **kwargs)

        # XXX
        # the place to map literal to numerical labels (and back)
        # this needs to be in the base class, since some classifiers also
        # have this nasty 'regression' mode, and the code in this class
        # needs to deal with converting the regression output into discrete
        # labels
        # however, preferably the mapping should be kept in the respective
        # low-level implementations that need it
        self._attrmap = AttributeMap()

        self.__trainednfeatures = 0
        """Stores number of features for which classifier was trained.
        If 0 -- it wasn't trained at all"""

        self._set_retrainable(self.params.retrainable, force=True)

        # deprecate
        #self.__trainedidhash = None
        #"""Stores id of the dataset on which it was trained to signal
        #in trained() if it was trained already on the same dataset"""

    def __summary_class__(self):
        if 'regression' in self.__tags__:
            return RegressionStatistics
            return ConfusionMatrix

    def __is_regression__(self):
        return 'regression' in self.__tags__

    def __str__(self, *args, **kwargs):
        if __debug__ and 'CLF_' in debug.active:
            return "%s / %s" % (repr(self), super(Classifier, self).__str__())
            return _str(self, *args, **kwargs)

    def _pretrain(self, dataset):
        """Functionality prior to training
        # So we reset all conditional attributes and may be free up some memory
        # explicitly
        params = self.params
        if not params.retrainable:
            # just reset the ca, do not untrain
            if not self.__changedData_isset:
                _changedData = self._changedData
                __idhashes = self.__idhashes
                __invalidatedChangedData = self.__invalidatedChangedData

                # if we don't know what was changed we need to figure
                # them out
                if __debug__:
                    debug('CLF_', "IDHashes are %s", (__idhashes, ))

                # Look at the data if any was changed
                for key, data_ in (('traindata', dataset.samples),
                    _changedData[key] = self.__was_data_changed(key, data_)
                    # if those idhashes were invalidated by retraining
                    # we need to adjust _changedData accordingly
                    if __invalidatedChangedData.get(key, False):
                        if __debug__ and not _changedData[key]:
                                'CLF_', 'Found that idhash for %s was '
                                'invalidated by retraining', (key, ))
                        _changedData[key] = True

                # Look at the parameters
                for col in self._paramscols:
                    changedParams = self._collections[col].which_set()
                    if len(changedParams):
                        _changedData[col] = changedParams

                self.__invalidatedChangedData = {}  # reset it on training

                if __debug__:
                    debug('CLF_', "Obtained _changedData is %s",
                          (self._changedData, ))

    def _posttrain(self, dataset):
        """Functionality post training

        For instance -- computing confusion matrix.

        dataset : Dataset
          Data which was used for training
        super(Classifier, self)._posttrain(dataset)

        ca = self.ca

        # needs to be assigned first since below we use predict
        self.__trainednfeatures = dataset.nfeatures

        if __debug__ and 'CHECK_TRAINED' in debug.active:
            self.__trainedidhash = dataset.idhash

        if ca.is_enabled('training_stats') and \
               not ca.is_set('training_stats'):
            # we should not store predictions for training data,
            # it is confusing imho (yoh)
            if self.params.retrainable:
                # we would need to recheck if data is the same,
                # XXX think if there is a way to make this all
                # efficient. For now, probably, retrainable
                # classifiers have no chance but not to use
                # training_stats... sad
                self.__changedData_isset = False
            predictions = self.predict(dataset)
            targets = dataset.sa[self.get_space()].value
            if is_datasetlike(predictions) and (self.get_space()
                                                in predictions.fa):
                # e.g. in case of pair-wise uncombined results - provide
                # stats per each of the targets pairs
                prediction_targets = predictions.fa[self.get_space()].value
                ca.training_stats = dict(
                     ) for i, t in enumerate(prediction_targets))
                ca.training_stats = self.__summary_class__(
                    targets=targets, predictions=predictions)

    def summary(self):
        """Providing summary over the classifier"""

        s = "Classifier %s" % self
        ca = self.ca
        ca_enabled = ca.enabled

        if self.trained:
            s += "\n trained"
            if ca.is_set('training_time'):
                s += ' in %.3g sec' % ca.training_time
            s += ' on data with'
            if ca.is_set('trained_targets'):
                s += ' targets:%s' % list(ca.trained_targets)

            nsamples, nchunks = None, None
            if ca.is_set('trained_nsamples'):
                nsamples = ca.trained_nsamples
            if ca.is_set('trained_dataset'):
                td = ca.trained_dataset
                nsamples, nchunks = td.nsamples, len(td.sa['chunks'].unique)
            if nsamples is not None:
                s += ' #samples:%d' % nsamples
            if nchunks is not None:
                s += ' #chunks:%d' % nchunks

            s += " #features:%d" % self.__trainednfeatures
            if ca.is_set('training_stats'):
                s += ", training error:%.3g" % ca.training_stats.error
            s += "\n not yet trained"

        if len(ca_enabled):
            s += "\n enabled ca:%s" % ', '.join(
                [str(ca[x]) for x in ca_enabled])
        return s

    def clone(self):
        """Create full copy of the classifier.

        It might require classifier to be untrained first due to
        present SWIG bindings.

        TODO: think about proper re-implementation, without enrollment of deepcopy
        if __debug__:
            debug("CLF", "Cloning %s%s", (self, _strid(self)))
            return deepcopy(self)
            return deepcopy(self)

    def _train(self, dataset):
        """Function to be actually overridden in derived classes
        raise NotImplementedError

    def _prepredict(self, dataset):
        """Functionality prior prediction
        if not ('notrain2predict' in self.__tags__):
            # check if classifier was trained if that is needed
            if not self.trained:
                raise FailedToPredictError(
                    "Classifier %s wasn't yet trained, therefore can't "
                    "predict" % self)
            nfeatures = dataset.nfeatures  #data.shape[1]
            # check if number of features is the same as in the data
            # it was trained on
            if nfeatures != self.__trainednfeatures:
                raise ValueError, \
                      "Classifier %s was trained on data with %d features, " % \
                      (self, self.__trainednfeatures) + \
                      "thus can't predict for %d features" % nfeatures

        if self.params.retrainable:
            if not self.__changedData_isset:
                _changedData = self._changedData
                data = np.asanyarray(dataset.samples)
                _changedData['testdata'] = \
                                        self.__was_data_changed('testdata', data)
                if __debug__:
                    debug('CLF_', "prepredict: Obtained _changedData is %s",
                          (_changedData, ))

    def _postpredict(self, dataset, result):
        """Functionality after prediction is computed
        self.ca.predictions = result
        if self.params.retrainable:
            self.__changedData_isset = False

    def _predict(self, dataset):
        """Actual prediction
        raise NotImplementedError

    def predict(self, dataset):
        """Predict classifier on data

        Shouldn't be overridden in subclasses unless explicitly needed
        to do so. Also subclasses trying to call super class's predict
        should call _predict if within _predict instead of predict()
        since otherwise it would loop
        ## ??? yoh: changed to asany from as without exhaustive check
        data = np.asanyarray(dataset.samples)
        if __debug__:
            # Verify that we have no NaN/Inf's which we do not "support" ATM
            if not np.all(np.isfinite(data)):
                raise ValueError(
                    "Some input data for predict is not finite (NaN or Inf)")
            debug("CLF", "Predicting classifier %s on ds %s", (self, dataset))

        # remember the time when started computing predictions
        t0 = time.time()

        ca = self.ca
        # to assure that those are reset (could be set due to testing
        # post-training)
        ca.reset(['estimates', 'predictions'])


        if self.__trainednfeatures > 0 \
               or 'notrain2predict' in self.__tags__:
            result = self._predict(dataset)
                "Trying to predict using classifier trained on no features")
            if __debug__:
                      "No features were present for training, prediction is " \
            result = [None] * data.shape[0]

        ca.predicting_time = time.time() - t0

        # with labels mapping in-place, we also need to go back to the
        # literal labels
        if self._attrmap:
                result = self._attrmap.to_literal(result)
            except KeyError, e:
                raise FailedToPredictError, \
                      "Failed to convert predictions from numeric into " \
                      "literals: %s" % e

        self._postpredict(dataset, result)
        return result
Exemple #20
class ProcrusteanMapper(ProjectionMapper):
    """Mapper to project from one space to another using Procrustean
    transformation (shift + scaling + rotation).

    Training this mapper requires data for both source and target space to be
    present in the training dataset. The source space data is taken from the
    training dataset's ``samples``, while the target space is taken from a
    sample attribute corresponding to the ``space`` setting of the

    See: http://en.wikipedia.org/wiki/Procrustes_transformation
    scaling = Parameter(
        doc="""Estimate a global scaling factor for the transformation
                       (no longer rigid body)""")
    reflection = Parameter(
        doc="""Allow for the data to be reflected (so it might not be
                     a rotation. Effective only for non-oblique transformations.
    reduction = Parameter(
        doc="""If true, it is allowed to map into lower-dimensional
                     space. Forward transformation might be suboptimal then and
                     reverse transformation might not recover all original
    oblique = Parameter(
        doc="""Either to allow non-orthogonal transformation -- might
                     heavily overfit the data if there is less samples than
                     dimensions. Use `oblique_rcond`.""")
    oblique_rcond = Parameter(
        doc="""Cutoff for 'small' singular values to regularize the
                     inverse. See :class:`~numpy.linalg.lstsq` for more
    svd = Parameter(
        constraints=EnsureChoice('numpy', 'scipy', 'dgesvd'),
        doc="""Implementation of SVD to use. dgesvd requires ctypes to
                 be available.""")

    def __init__(self, space='targets', **kwargs):
        ProjectionMapper.__init__(self, space=space, **kwargs)

        self._scale = None
        """Estimated scale"""
        if self.params.svd == 'dgesvd' and not externals.exists(
                "Reverting choice of svd for ProcrusteanMapper to be default "
                "'numpy' since liblapack.so seems not to be available for "
            self.params.svd = 'numpy'

    def _train(self, source):
        params = self.params
        # Since it is unsupervised, we don't care about labels
        datas = ()
        odatas = ()
        means = ()
        shapes = ()

        assess_residuals = __debug__ and 'MAP_' in debug.active

        target = source.sa[self.get_space()].value

        for i, ds in enumerate((source, target)):
            if is_datasetlike(ds):
                data = np.asarray(ds.samples)
                data = ds
            if assess_residuals:
                odatas += (data, )
            if self._demean:
                if i == 0:
                    mean = self._offset_in
                    mean = data.mean(axis=0)
                data = data - mean
                # no demeaning === zero means
                mean = np.zeros(shape=data.shape[1:])
            means += (mean, )
            datas += (data, )
            shapes += (data.shape, )

        # shortcuts for sizes
        sn, sm = shapes[0]
        tn, tm = shapes[1]

        # Check the sizes
        if sn != tn:
            raise ValueError, "Data for both spaces should have the same " \
                  "number of samples. Got %d in source and %d in target space" \
                  % (sn, tn)

        # Sums of squares
        ssqs = [np.sum(d**2, axis=0) for d in datas]

        # XXX check for being invariant?
        #     needs to be tuned up properly and not raise but handle
        for i in xrange(2):
            if np.all(ssqs[i] <= np.abs((np.finfo(datas[i].dtype).eps * sn *
                raise ValueError, "For now do not handle invariant in time datasets"

        norms = [np.sqrt(np.sum(ssq)) for ssq in ssqs]
        normed = [data / norm for (data, norm) in zip(datas, norms)]

        # add new blank dimensions to source space if needed
        if sm < tm:
            normed[0] = np.hstack((normed[0], np.zeros((sn, tm - sm))))

        if sm > tm:
            if params.reduction:
                normed[1] = np.hstack((normed[1], np.zeros((sn, sm - tm))))
                raise ValueError, "reduction=False, so mapping from " \
                      "higher dimensionality " \
                      "source space is not supported. Source space had %d " \
                      "while target %d dimensions (features)" % (sm, tm)

        source, target = normed
        if params.oblique:
            # Just do silly linear system of equations ;) or naive
            # inverse problem
            if sn == sm and tm == 1:
                T = np.linalg.solve(source, target)
                T = np.linalg.lstsq(source, target,
            ss = 1.0
            # Orthogonal transformation
            # figure out optimal rotation
            if params.svd == 'numpy':
                U, s, Vh = np.linalg.svd(np.dot(target.T, source),
            elif params.svd == 'scipy':
                # would raise exception if not present
                externals.exists('scipy', raise_=True)
                import scipy
                U, s, Vh = scipy.linalg.svd(np.dot(target.T, source),
            elif params.svd == 'dgesvd':
                from mvpa2.support.lapack_svd import svd as dgesvd
                U, s, Vh = dgesvd(np.dot(target.T, source),
                raise ValueError('Unknown type of svd %r' % (params.svd))
            T = np.dot(Vh.T, U.T)

            if not params.reflection:
                # then we need to assure that it is only rotation
                # "recipe" from
                # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
                # for more and info and original references, see
                # http://dx.doi.org/10.1007%2FBF02289451
                s_new = np.ones_like(s)
                s_new[-1] = np.linalg.det(T)
                T = np.dot(Vh.T * s_new, U.T)

            # figure out scale and final translation
            if not params.reflection:
                ss = np.sum(s_new * s)
                ss = np.sum(s)

        # if we were to collect standardized distance
        # std_d = 1 - sD**2

        # select out only relevant dimensions
        if sm != tm:
            T = T[:sm, :tm]

        self._scale = scale = ss * norms[1] / norms[0]
        # Assign projection
        if self.params.scaling:
            proj = scale * T
            proj = T
        self._proj = proj

        if self._demean:
            self._offset_out = means[1]

        if __debug__ and 'MAP_' in debug.active:
            # compute the residuals
            res_f = self.forward(odatas[0])
            d_f = np.linalg.norm(odatas[1] - res_f) / np.linalg.norm(odatas[1])
            res_r = self.reverse(odatas[1])
            d_r = np.linalg.norm(odatas[0] - res_r) / np.linalg.norm(odatas[0])
                'MAP_', "%s, residuals are forward: %g,"
                " reverse: %g" % (repr(self), d_f, d_r))

    def _compute_recon(self):
        """For Procrustean mapper, inverse is transpose.
        So, let's skip computing inverse in the super class.
        # XXX Change pinv to superclass compute_recon?
        if self.params.oblique:
            #return ProjectionMapper._compute_recon(self)
            return np.linalg.pinv(self._proj)
            return np.transpose(
                self._proj /
                self._scale**2) if self.params.scaling else np.transpose(
Exemple #21
class GDA(Classifier):
    """Gaussian Discriminant Analysis -- base for LDA and QDA


    __tags__ = ['binary', 'multiclass', 'oneclass']

    prior = Parameter('laplacian_smoothing',
                                               'uniform', 'ratio'),
                      doc="""How to compute prior distribution.""")

    allow_pinv = Parameter(
        doc="""Allow pseudo-inverse in case of degenerate covariance(s).""")

    def __init__(self, **kwargs):
        """Initialize a GDA classifier.

        # init base class first
        Classifier.__init__(self, **kwargs)

        # pylint friendly initializations
        self.means = None
        """Means of features per class"""
        self.cov = None
        """Co-variances per class, but "vars" is taken ;)"""
        self.ulabels = None
        """Labels classifier was trained on"""
        self.priors = None
        """Class probabilities"""
        self.nsamples_per_class = None
        """Number of samples per class - used by derived classes"""

        # Define internal state of classifier
        self._norm_weight = None

    def _get_priors(self, nlabels, nsamples, nsamples_per_class):
        """Return prior probabilities given data
        prior = self.params.prior
        if prior == 'uniform':
            priors = np.ones((nlabels, )) / nlabels
        elif prior == 'laplacian_smoothing':
            priors = (1+np.squeeze(nsamples_per_class)) \
                          / (float(nsamples) + nlabels)
        elif prior == 'ratio':
            priors = np.squeeze(nsamples_per_class) / float(nsamples)
            raise ValueError, \
                  "No idea on how to handle '%s' way to compute priors" \
                  % self.params.prior
        return np.atleast_1d(priors)

    def _train(self, dataset):
        """Train the classifier using `dataset` (`Dataset`).
        params = self.params
        targets_sa_name = self.get_space()
        targets_sa = dataset.sa[targets_sa_name]

        # get the dataset information into easy vars
        X = dataset.samples
        labels = targets_sa.value
        self.ulabels = ulabels = targets_sa.unique
        nlabels = len(ulabels)
        label2index = dict((l, il) for il, l in enumerate(ulabels))

        # set the feature dimensions
        nsamples = len(X)
        nfeatures = dataset.nfeatures

        self.means = means = \
                     np.zeros((nlabels, nfeatures))
        # degenerate dimension are added for easy broadcasting later on
        # XXX might want to remove -- for now taken from GNB as is
        self.nsamples_per_class = nsamples_per_class \
                                  = np.zeros((nlabels, 1))
        self.cov = cov = \
                     np.zeros((nlabels, nfeatures, nfeatures))

        # Estimate cov
        # better loop than repmat! ;)
        for l, il in label2index.iteritems():
            Xl = X[labels == l]
            nsamples_per_class[il] = len(Xl)
            # TODO: degenerate case... no samples for known label for
            #       some reason?
            means[il] = np.mean(Xl, axis=0)
            # since we have means already lets do manually cov here
            Xldm = Xl - means[il]
            cov[il] = np.dot(Xldm.T, Xldm)
            # scaling will be done correspondingly in LDA or QDA

        # Store prior probabilities
        self.priors = self._get_priors(nlabels, nsamples, nsamples_per_class)

        if __debug__ and 'GDA' in debug.active:
                'GDA', "training finished on data.shape=%s " % (X.shape, ) +
                "min:max(data)=%f:%f" % (np.min(X), np.max(X)))

    def _untrain(self):
        """Untrain classifier and reset all learnt params
        self.means = None
        self.cov = None
        self.ulabels = None
        self.priors = None
        super(GDA, self)._untrain()

    def _predict(self, data):
        """Predict the output for the provided data.
        params = self.params

        self.ca.estimates = prob_cs_cp = self._g_k(data)

        # Take the class with maximal (log)probability
        # XXX in GNB it is axis=0, i.e. classes were first
        winners = prob_cs_cp.argmax(axis=1)
        predictions = [self.ulabels[c] for c in winners]

        if __debug__ and 'GDA' in debug.active:
                'GDA', "predict on data.shape=%s min:max(data)=%f:%f " %
                (data.shape, np.min(data), np.max(data)))

        return predictions

    def _inv(self, cov):
            return np.linalg.inv(cov)
        except Exception, e:
            if self.params.allow_pinv:
                    return np.linalg.pinv(cov)
                except Exception, e:
            raise DegenerateInputError, \
              "Data is probably singular, since inverse fails. Got %s"\
              % (e,)
Exemple #22
class LinearSVMWeights(Sensitivity):
    """`SensitivityAnalyzer` for the LIBSVM implementation of a linear SVM.


    split_weights = Parameter(
        doc="If binary classification either to sum SVs per each "
        "class separately.  Note: be careful with interpretation"
        " of the values")

    def __init__(self, clf, **kwargs):
        """Initialize the analyzer with the classifier it shall use.

        clf : LinearSVM
          classifier to use. Only classifiers sub-classed from
          `LinearSVM` may be used.
        # init base classes first
        Sensitivity.__init__(self, clf, **kwargs)

    def _call(self, dataset, callables=[]):
        # local bindings
        clf = self.clf
        model = clf.model

        # Labels for sensitivities to be returned
        sens_labels = None

        if clf.__is_regression__:
            nr_class = None
            svm_labels = None  # shouldn't bother to provide "targets" for regressions
            nr_class = model.nr_class
            svm_labels = model.labels

        # No need to warn since now we by default we do not do
        # anything evil and provide labels -- so it is up for a user
        # to decide either he wants to do something silly
        #if nr_class != 2:
        #    warning("You are estimating sensitivity for SVM %s trained on %d" %
        #            (str(clf), nr_class) +
        #            " classes. Make sure that it is what you intended to do" )

        svcoef = np.matrix(model.get_sv_coef())
        svs = np.matrix(model.get_sv())
        rhos = np.asarray(model.get_rho())

        if self.params.split_weights:
            if nr_class != 2:
                raise NotImplementedError, \
                      "Cannot compute per-class weights for" \
                      " non-binary classification task"
            # libsvm might have different idea on the ordering
            # of labels, so we would need to map them back explicitely
            ds_labels = list(
                dataset.sa[clf.get_space()].unique)  # labels in the dataset
            senses = [None for i in ds_labels]
            # first label is given positive value
            for i, (c, l) in enumerate([(svcoef > 0, lambda x: x),
                                        (svcoef < 0, lambda x: x * -1)]):
                # convert to array, and just take the meaningful dimension
                c_ = c.A[0]
                # NOTE svm_labels are numerical; ds_labels are literal
                            clf._attrmap.to_literal(svm_labels[i]))] = \
                                (l(svcoef[:, c_] * svs[c_, :])).A[0]
            weights = np.array(senses)
            sens_labels = svm_labels
            # XXX yoh: .mean() is effectively
            # averages across "sensitivities" of all paired classifiers (I
            # think). See more info on this topic in svm.py on how sv_coefs
            # are stored
            # First multiply SV coefficients with the actual SVs to get
            # weighted impact of SVs on decision, then for each feature
            # take mean across SVs to get a single weight value
            # per feature
            if nr_class is None or nr_class <= 2:
                # as simple as this
                weights = (svcoef * svs).A
                # and only in case of classification
                if nr_class:
                    # ??? First label seems corresponds to positive
                    sens_labels = [tuple(svm_labels[::-1])]
                # we need to compose correctly per each pair of classifiers.
                # See docstring for get_sv_coef for more details on internal
                # structure of bloody storage

                # total # of pairs
                npairs = nr_class * (nr_class - 1) / 2
                # # of SVs in each class
                NSVs_perclass = model.get_n_sv()
                # indices where each class starts in each row of SVs
                # name is after similar variable in libsvm internals
                nz_start = np.cumsum([0] + NSVs_perclass[:-1])
                nz_end = nz_start + NSVs_perclass
                # reserve storage
                weights = np.zeros((npairs, svs.shape[1]))
                ipair = 0  # index of the pair
                // classifier (i,j): coefficients with
				// i are in sv_coef[j-1][nz_start[i]...],
				// j are in sv_coef[i][nz_start[j]...]
                sens_labels = []
                for i in xrange(nr_class):
                    for j in xrange(i + 1, nr_class):
                        weights[ipair, :] = np.asarray(
                            svcoef[j - 1, nz_start[i]:nz_end[i]] *
                            svs[nz_start[i]:nz_end[i]] +
                            svcoef[i, nz_start[j]:nz_end[j]] *
                        # ??? First label corresponds to positive
                        # that is why [j], [i]
                        sens_labels += [(svm_labels[j], svm_labels[i])]
                        ipair += 1  # go to the next pair
                assert (ipair == npairs)

        if __debug__ and 'SVM' in debug.active:
            if nr_class:
                nsvs = model.get_n_sv()
                nsvs = model.get_total_n_sv()
            if clf.__is_regression__:
                svm_type = clf._svm_impl  # type of regression
                svm_type = '%d-class SVM(%s)' % (nr_class, clf._svm_impl)
                  "Extracting weights for %s: #SVs=%s, " % \
                  (svm_type, nsvs) + \
                  " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \
                  (svcoef.shape, svs.shape, rhos) + \
                  " Result: min=%f max=%f" % (np.min(weights), np.max(weights)))

        ds_kwargs = {}
        if nr_class:  # for classification only
            # and we should have prepared the labels
            assert (sens_labels is not None)

            if len(clf._attrmap):
                if isinstance(sens_labels[0], tuple):
                    sens_labels = asobjarray(sens_labels)
                sens_labels = clf._attrmap.to_literal(sens_labels,

            # NOTE: `weights` is already and always 2D
            ds_kwargs = dict(sa={clf.get_space(): sens_labels})

        weights_ds = Dataset(weights, **ds_kwargs)
        weights_ds.sa['biases'] = rhos
        return weights_ds

    _customizeDocInherit = True
Exemple #23
class SVM(_SVM):
    """Support Vector Machine Classifier(s) based on Shogun

    This is a simple base interface
    __default_kernel_class__ = _default_kernel_class_
    num_threads = Parameter(1,
                            doc='Number of threads to utilize')

    _KNOWN_PARAMS = [ 'epsilon' ]

    __tags__ = _SVM.__tags__ + [ 'sg', 'retrainable' ]

    # Some words of wisdom from shogun author:
    # XXX remove after proper comments added to implementations
    If you'd like to train linear SVMs use SGD or OCAS. These are (I am
    serious) the fastest linear SVM-solvers to date. (OCAS cannot do SVMs
    with standard additive bias, but will L2 reqularize it - though it
    should not matter much in practice (although it will give slightly
    different solutions)). Note that SGD has no stopping criterion (you
    simply have to specify the number of iterations) and that OCAS has a
    different stopping condition than svmlight for example which may be more
    tight and more loose depending on the problem - I sugeest 1e-2 or 1e-3
    for epsilon.

    If you would like to train kernel SVMs use libsvm/gpdt/svmlight -
    depending on the problem one is faster than the other (hard to say when,
    I *think* when your dataset is very unbalanced chunking methods like
    svmlight/gpdt are better), for smaller problems definitely libsvm.

    If you use string kernels then gpdt/svmlight have a special 'linadd'
    speedup for this (requires sg 0.6.2 - there was some inefficiency in the
    code for python-modular before that). This is effective for big datasets
    and (I trained on 10 million strings based on this).

    And yes currently we only implemented parallel training for svmlight,
    however all SVMs can be evaluated in parallel.
    if externals.exists('shogun', raise_=True):
            "libsvm" : (shogun.Classifier.LibSVM, ('C',),
                       ('multiclass', 'binary'),
                        "LIBSVM's C-SVM (L2 soft-margin SVM)"),
            "gmnp" : (shogun.Classifier.GMNPSVM, ('C',),
                     ('multiclass', 'binary'),
                      "Generalized Nearest Point Problem SVM"),
            # XXX should have been GPDT, shogun has it fixed since some version
            "gpbt" : (shogun.Classifier.GPBTSVM, ('C',), ('binary',),
                      "Gradient Projection Decomposition Technique for " \
                      "large-scale SVM problems"),
            "gnpp" : (shogun.Classifier.GNPPSVM, ('C',), ('binary',),
                      "Generalized Nearest Point Problem SVM"),

            ## TODO: Needs sparse features...
            # "svmlin" : (shogun.Classifier.SVMLin, ''),
            # "liblinear" : (shogun.Classifier.LibLinear, ''),
            # "subgradient" : (shogun.Classifier.SubGradientSVM, ''),
            ## good 2-class linear SVMs
            # "ocas" : (shogun.Classifier.SVMOcas, ''),
            # "sgd" : ( shogun.Classifier.SVMSGD, ''),

            # regressions
            "libsvr": (shogun.Regression.LibSVR, ('C', 'tube_epsilon',),
                       "LIBSVM's epsilon-SVR"),

    def __init__(self, **kwargs):
        """Interface class to Shogun's classifiers and regressions.

        Default implementation is 'libsvm'.

        svm_impl = kwargs.get('svm_impl', 'libsvm').lower()
        kwargs['svm_impl'] = svm_impl

        # init base class
        _SVM.__init__(self, **kwargs)

        self.__svm = None
        """Holds the trained svm."""
        self.__svm_apply = None
        """Compatibility convenience to bind to the classify/apply method
           of __svm"""
        # Need to store original data...
        # TODO: keep 1 of them -- just __traindata or __traindataset
        # For now it is needed for computing sensitivities
        self.__traindataset = None

        # internal SG swig proxies
        self.__traindata = None
        self.__kernel = None
        self.__kernel_test = None
        self.__testdata = None

        # remove kernel-based for some
        # TODO RF: provide separate handling for non-kernel machines
        if svm_impl in ['svmocas']:
            if not (self.__kernel is None
                    or self.__kernel.__kernel_name__ == 'linear'):
                raise ValueError(
                    "%s is inherently linear, thus provided kernel %s "
                    "is of no effect" % (svm_impl, self.__kernel))

    # TODO: integrate with kernel framework
    #def __condition_kernel(self, kernel):
        ## XXX I thought that it is needed only for retrainable classifier,
        ##     but then krr gets confused, and svrlight needs it to provide
        ##     meaningful results even without 'retraining'
        #if self._svm_impl in ['svrlight', 'lightsvm']:
                #kernel.set_precompute_matrix(True, True)
            #except Exception, e:
                ## N/A in shogun 0.9.1... TODO: RF
                #if __debug__:
                    #debug('SG_', "Failed call to set_precompute_matrix for %s: %s"
                          #% (self, e))

    def _train(self, dataset):
        """Train SVM

        # XXX watchout
        # self.untrain()
        newkernel, newsvm = False, False
        # local bindings for faster lookup
        params = self.params
        retrainable = self.params.retrainable

        targets_sa_name = self.get_space()    # name of targets sa
        targets_sa = dataset.sa[targets_sa_name] # actual targets sa

        if retrainable:
            _changedData = self._changedData

        # LABELS
        ul = None
        self.__traindataset = dataset

        # OK -- we have to map labels since
        #  binary ones expect -1/+1
        #  Multiclass expect labels starting with 0, otherwise they puke
        #   when ran from ipython... yikes
        if __debug__:
            debug("SG_", "Creating labels instance")

        if self.__is_regression__:
            labels_ = np.asarray(targets_sa.value, dtype='double')
            ul = targets_sa.unique
            # ul.sort()

            if len(ul) == 2:
                # assure that we have -1/+1
                _labels_dict = {ul[0]:-1.0, ul[1]:+1.0}
            elif len(ul) < 2:
                raise FailedToTrainError, \
                      "We do not have 1-class SVM brought into SG yet"
                # can't use plain enumerate since we need them swapped
                _labels_dict = dict([ (ul[i], i) for i in range(len(ul))])

            # Create SG-customized attrmap to assure -1 / +1 if necessary
            self._attrmap = AttributeMap(_labels_dict, mapnumeric=True)

            if __debug__:
                debug("SG__", "Mapping labels using dict %s" % _labels_dict)
            labels_ = self._attrmap.to_numeric(targets_sa.value).astype(float)

        labels = shogun.Features.Labels(labels_)
        _setdebug(labels, 'Labels')

        # KERNEL

        # XXX cruel fix for now... whole retraining business needs to
        # be rethought
        if retrainable:
            _changedData['kernel_params'] = _changedData.get('kernel_params', False)

        # TODO: big RF to move non-kernel classifiers away
        if 'kernel-based' in self.__tags__ and (not retrainable
               or _changedData['traindata'] or _changedData['kernel_params']):
            # If needed compute or just collect arguments for SVM and for
            # the kernel

            if retrainable and __debug__:
                if _changedData['traindata']:
                          "Re-Creating kernel since training data has changed")

                if _changedData['kernel_params']:
                          "Re-Creating kernel since params %s has changed" %

            k = self.params.kernel
            self.__kernel = kernel = k.as_raw_sg()

            newkernel = True
            self.kernel_params.reset()  # mark them as not-changed
            #_setdebug(kernel, 'Kernels')

            if retrainable:
                if __debug__:
                    debug("SG_", "Resetting test kernel for retrainable SVM")
                self.__kernel_test = None

        # TODO -- handle _changedData['params'] correctly, ie without recreating
        # whole SVM
        Cs = None
        if not retrainable or self.__svm is None or _changedData['params']:
            # SVM
            if self.params.has_key('C'):
                Cs = self._get_cvec(dataset)

                # XXX do not jump over the head and leave it up to the user
                #     ie do not rescale automagically by the number of samples
                #if len(Cs) == 2 and not ('regression' in self.__tags__) and len(ul) == 2:
                #    # we were given two Cs
                #    if np.max(C) < 0 and np.min(C) < 0:
                #        # and both are requested to be 'scaled' TODO :
                #        # provide proper 'features' to the parameters,
                #        # so we could specify explicitely if to scale
                #        # them by the number of samples here
                #        nl = [np.sum(labels_ == _labels_dict[l]) for l in ul]
                #        ratio = np.sqrt(float(nl[1]) / nl[0])
                #        #ratio = (float(nl[1]) / nl[0])
                #        Cs[0] *= ratio
                #        Cs[1] /= ratio
                #        if __debug__:
                #            debug("SG_", "Rescaled Cs to %s to accomodate the "
                #                  "difference in number of training samples" %
                #                  Cs)

            # Choose appropriate implementation
            svm_impl_class = self.__get_implementation(ul)

            if __debug__:
                debug("SG", "Creating SVM instance of %s" % `svm_impl_class`)

            if self._svm_impl in ['libsvr', 'svrlight']:
                # for regressions constructor a bit different
                self.__svm = svm_impl_class(Cs[0], self.params.tube_epsilon, self.__kernel, labels)
                # we need to set epsilon explicitly
            elif self._svm_impl in ['krr']:
                self.__svm = svm_impl_class(self.params.tau, self.__kernel, labels)
            elif 'kernel-based' in self.__tags__:
                self.__svm = svm_impl_class(Cs[0], self.__kernel, labels)
                traindata_sg = _tosg(dataset.samples)
                self.__svm = svm_impl_class(Cs[0], traindata_sg, labels)

            # To stay compatible with versions across API changes in sg 1.0.0
            self.__svm_apply = externals.versions['shogun'] >= '1' \
                               and self.__svm.apply \
                               or  self.__svm.classify # the last one for old API

            # Set shrinking
            if 'shrinking' in params:
                shrinking = params.shrinking
                if __debug__:
                    debug("SG_", "Setting shrinking to %s" % shrinking)

            if Cs is not None and len(Cs) == 2:
                if __debug__:
                    debug("SG_", "Since multiple Cs are provided: %s, assign them" % Cs)
                self.__svm.set_C(Cs[0], Cs[1])

            self.params.reset()  # mark them as not-changed
            newsvm = True
            _setdebug(self.__svm, 'SVM')
            # Set optimization parameters
            if self.params.has_key('tube_epsilon') and \
                   hasattr(self.__svm, 'set_tube_epsilon'):
            if __debug__:
                debug("SG_", "SVM instance is not re-created")
            if _changedData['targets']:          # labels were changed
                if __debug__: debug("SG__", "Assigning new labels")
            if newkernel:               # kernel was replaced
                if __debug__: debug("SG__", "Assigning new kernel")
            assert(_changedData['params'] is False)  # we should never get here

        if retrainable:
            # we must assign it only if it is retrainable
            self.ca.retrained = not newsvm or not newkernel

        # Train
        if __debug__ and 'SG' in debug.active:
            if not self.__is_regression__:
                lstr = " with labels %s" % targets_sa.unique
                lstr = ""
            debug("SG", "%sTraining %s on data%s" %
                  (("","Re-")[retrainable and self.ca.retrained],
                   self, lstr))


        if __debug__:
            debug("SG_", "Done training SG_SVM %s" % self)

        # Report on training
        if (__debug__ and 'SG__' in debug.active) or \
            if __debug__:
                debug("SG_", "Assessing predictions on training data")
            trained_targets = self.__svm_apply().get_labels()

            trained_targets = None

        if __debug__ and "SG__" in debug.active:
            debug("SG__", "Original labels: %s, Trained labels: %s" %
                  (targets_sa.value, trained_targets))

        # Assign training confusion right away here since we are ready
        # to do so.
        # XXX TODO use some other conditional attribute like 'trained_targets' and
        #     use it within base Classifier._posttrain to assign predictions
        #     instead of duplicating code here
        # XXX For now it can be done only for regressions since labels need to
        #     be remapped and that becomes even worse if we use regression
        #     as a classifier so mapping happens upstairs
        if self.__is_regression__ and self.ca.is_enabled('training_stats'):
            self.ca.training_stats = self.__summary_class__(

    # XXX actually this is the beast which started this evil conversion
    #     so -- make use of dataset here! ;)
    def _predict(self, dataset):
        """Predict values for the data

        retrainable = self.params.retrainable

        if retrainable:
            changed_testdata = self._changedData['testdata'] or \
                               self.__kernel_test is None

        if not retrainable:
            if __debug__:
                      "Initializing SVMs kernel of %s with training/testing samples"
                      % self)
            self.params.kernel.compute(self.__traindataset, dataset)
            self.__kernel_test = self.params.kernel.as_sg()._k
            # We can just reuse kernel used for training

            if changed_testdata:
                #if __debug__:
                          #"Re-creating testing kernel of %s giving "
                          #"arguments %s" %
                          #(`self._kernel_type`, self.__kernel_args))
                self.params.kernel.compute(self.__traindataset, dataset)

                #_setdebug(kernel_test, 'Kernels')

                #_setdebug(kernel_test_custom, 'Kernels')
                self.__kernel_test = self.params.kernel.as_raw_sg()

            elif __debug__:
                debug("SG__", "Re-using testing kernel")

        assert(self.__kernel_test is not None)

        if 'kernel-based' in self.__tags__:
            # doesn't do any good imho although on unittests helps tiny bit... hm
            values_ = self.__svm_apply()
            testdata_sg = _tosg(dataset.samples)
            values_ = self.__svm_apply()

        if __debug__:
            debug("SG_", "Classifying testing data")

        if values_ is None:
            raise RuntimeError, "We got empty list of values from %s" % self

        values = values_.get_labels()

        if retrainable:
            # we must assign it only if it is retrainable
            self.ca.repredicted = repredicted = not changed_testdata
            if __debug__:
                debug("SG__", "Re-assigning learing kernel. Repredicted is %s"
                      % repredicted)
            # return back original kernel
            if 'kernel-based' in self.__tags__:

        if __debug__:
            debug("SG__", "Got values %s" % values)

        if (self.__is_regression__):
            predictions = values
            if len(self._attrmap.keys()) == 2:
                predictions = np.sign(values)
                # since np.sign(0) == 0
                predictions[predictions==0] = 1
                predictions = values

            # remap labels back adjusting their type
            # XXX YOH: This is done by topclass now (needs RF)
            #predictions = self._attrmap.to_literal(predictions)

            if __debug__:
                debug("SG__", "Tuned predictions %s" % predictions)

        # store conditional attribute
        # TODO: extract values properly for multiclass SVMs --
        #       ie 1 value per label or pairs for all 1-vs-1 classifications
        self.ca.estimates = values

        ## to avoid leaks with not yet properly fixed shogun
        if not retrainable:

        return predictions

    def _untrain(self):
        super(SVM, self)._untrain()
        # untrain/clean the kernel -- we might not allow to drag SWIG
        # instance around BUT XXX -- make it work fine with
        # CachedKernel -- we might not want to fully "untrain" in such
        # case
        self.params.kernel.cleanup()    # XXX unify naming
        if not self.params.retrainable:
            if __debug__:
                debug("SG__", "Untraining %(clf)s and destroying sg's SVM",

            # to avoid leaks with not yet properly fixed shogun
            # XXX make it nice... now it is just stable ;-)
            if True: # not self.__traindata is None:
                if True:
                # try:
                    if self.__kernel is not None:
                        del self.__kernel
                        self.__kernel = None

                    if self.__kernel_test is not None:
                        del self.__kernel_test
                        self.__kernel_test = None

                    if self.__svm is not None:
                        del self.__svm
                        self.__svm = None
                        self.__svm_apply = None

                    if self.__traindata is not None:
                        # Let in for easy demonstration of the memory leak in shogun
                        #for i in xrange(10):
                        #    debug("SG__", "cachesize pre free features %s" %
                        #          (self.__svm.get_kernel().get_cache_size()))
                        del self.__traindata
                        self.__traindata = None

                    self.__traindataset = None

                #    pass

            if __debug__:
                      "Done untraining %(self)s and destroying sg's SVM",
        elif __debug__:
            debug("SG__", "Not untraining %(self)s since it is retrainable",

    def __get_implementation(self, ul):
        if self.__is_regression__ or len(ul) == 2:
            svm_impl_class = SVM._KNOWN_IMPLEMENTATIONS[self._svm_impl][0]
            if self._svm_impl == 'libsvm':
                svm_impl_class = shogun.Classifier.LibSVMMultiClass
            elif self._svm_impl == 'gmnp':
                svm_impl_class = shogun.Classifier.GMNPSVM
                raise RuntimeError, \
                      "Shogun: Implementation %s doesn't handle multiclass " \
                      "data. Got labels %s. Use some other classifier" % \
            if __debug__:
                debug("SG_", "Using %s for multiclass data of %s" %
                      (svm_impl_class, self._svm_impl))

        return svm_impl_class

    svm = property(fget=lambda self: self.__svm)
    """Access to the SVM model."""

    traindataset = property(fget=lambda self: self.__traindataset)
    """Dataset which was used for training
class GroupClusterThreshold_NN3(Learner):
    """Statistical evaluation of group-level average accuracy maps

    This algorithm can be used to perform cluster-thresholding of
    searchlight-based group analyses. It implements a two-stage procedure that
    uses the results of within-subject permutation analyses, estimates a per
    feature cluster forming threshold (via bootstrap), and uses the thresholded
    bootstrap samples to estimate the distribution of cluster sizes in
    group-average accuracy maps under the NULL hypothesis, as described in [1]_.

    Note: this class implements a modified version of that algorithm. The
    present implementation differs in, at least, four aspects from the
    description in that paper.

    1) Cluster p-values refer to the probability of observing a particular
       cluster size or a larger one (original paper: probability to observe a
       larger cluster only).  Consequently, probabilities reported by this
       implementation will have a tendency to be higher in comparison.

    2) Clusters found in the original (unpermuted) accuracy map are always
       included in the NULL distribution estimate of cluster sizes. This
       provides an explicit lower bound for probabilities, as there will
       always be at least one observed cluster for every cluster size found
       in the original accuracy map. Consequently, it is impossible to get a
       probability of zero for clusters of any size (see [2] for more

    3) Bootstrap accuracy maps that contain no clusters are counted in a
       dedicated size-zero bin in the NULL distribution of cluster sizes.
       This change yields reliable cluster-probabilities even for very low
       featurewise threshold probabilities, where (some portion) of the
       bootstrap accuracy maps do not contain any clusters.

    4) The method for FWE-correction used by the original authors is not
       provided. Instead, a range of alternatives implemented by the
       statsmodels package are available.

    Moreover, this implementation minimizes the required memory demands and
    allows for computing large numbers of bootstrap samples without
    significant increase in memory demand (CPU time trade-off).

    Instances of this class must be trained before than can be used to
    threshold accuracy maps. The training dataset must match the following

    1) For every subject in the group, it must contain multiple accuracy maps
       that are the result of a within-subject classification analysis
       based on permuted class labels. One map must corresponds to one fixed
       permutation for all features in the map, as described in [1]_. The
       original authors recommend 100 accuracy maps per subject for a typical
       searchlight analysis.

    2) It must contain a sample attribute indicating which sample is
       associated with which subject, because bootstrapping average accuracy
       maps is implemented by randomly drawing one map from each subject.
       The name of the attribute can be configured via the ``chunk_attr``

    After training, an instance can be called with a dataset to perform
    threshold and statistical evaluation. Unless a single-sample dataset
    is passed, all samples in the input dataset will be averaged prior

      This is a shallow copy of the input dataset (after a potential
      averaging), hence contains the same data and attributes. In addition it
      includes the following attributes:

        Vector with feature-wise cluster-forming thresholds.

        Vector with labels for clusters after thresholding the input data
        with the desired feature-wise probability. Each unique non-zero
        element corresponds to an individual super-threshold cluster. Cluster
        values are sorted by cluster size (number of features). The largest
        cluster is always labeled with ``1``.

        Vector with labels for super-threshold clusters after correction for
        multiple comparisons. The attribute is derived from
        ``fa.clusters_featurewise_thresh`` by removing all clusters that
        do not pass the threshold when controlling for the family-wise error

        Record array with information on all detected clusters. The array is
        sorted according to cluster size, starting with the largest cluster
        in terms of number of features. The array contains the fields ``size``
        (number of features comprising the cluster), ``mean``, ``median``,
        min``, ``max``, ``std`` (respective descriptive statistics for all
        clusters), and ``prob_raw`` (probability of observing the cluster of a
        this size or larger under the NULL hypothesis). If correction for
        multiple comparisons is enabled an additional field ``prob_corrected``
        (probability after correction) is added.

        Record array with information on the location of all detected clusters.
        The array is sorted according to cluster size (same order as
        ``a.clusterstats``. The array contains the fields ``max``
        (feature coordinate of the maximum score within the cluster, and
        ``center_of_mass`` (coordinate of the center of mass; weighted by
        the feature values within the cluster.

    .. [1] Johannes Stelzer, Yi Chen and Robert Turner (2013). Statistical
       inference and multiple testing correction in classification-based
       multi-voxel pattern analysis (MVPA): Random permutations and cluster
       size control. NeuroImage, 65, 69--82.
    .. [2] Smyth, G. K., & Phipson, B. (2010). Permutation P-values Should
       Never Be Zero: Calculating Exact P-values When Permutations Are
       Randomly Drawn. Statistical Applications in Genetics and Molecular
       Biology, 9, 1--12.

    n_bootstrap = Parameter(
        100000, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of bootstrap samples to be generated from the training
            dataset. For each sample, an average map will be computed from a
            set of randomly drawn samples (one from each chunk). Bootstrap
            samples will be used to estimate a featurewise NULL distribution of
            accuracy values for initial thresholding, and to estimate the NULL
            distribution of cluster sizes under the NULL hypothesis. A larger
            number of bootstrap samples reduces the lower bound of
            probabilities, which may be beneficial for multiple comparison

    feature_thresh_prob = Parameter(
        0.001, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0),
        doc="""Feature-wise probability threshold. The value corresponding
            to this probability in the NULL distribution of accuracies will
            be used as threshold for cluster forming. Given that the NULL
            distribution is estimated per feature, the actual threshold value
            will vary across features yielding a threshold vector. The number
            of bootstrap samples need to be adequate for a desired probability.
            A ``ValueError`` is raised otherwise.""")

    chunk_attr = Parameter(
        doc="""Name of the attribute indicating the individual chunks from
            which a single sample each is drawn for averaging into a bootstrap

    fwe_rate = Parameter(
        0.05, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0),
        doc="""Family-wise error rate for multiple comparison correction
            of cluster size probabilities.""")

    multicomp_correction = Parameter(
        'fdr_bh', constraints=EnsureChoice('bonferroni', 'sidak', 'holm-sidak',
                                           'holm', 'simes-hochberg', 'hommel',
                                           'fdr_bh', 'fdr_by', None),
        doc="""Strategy for multiple comparison correction of cluster
            probabilities. All methods supported by statsmodels' ``multitest``
            are available. In addition, ``None`` can be specified to disable

    n_blocks = Parameter(
        1, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of segments used to compute the feature-wise NULL
            distributions. This parameter determines the peak memory demand.
            In case of a single segment a matrix of size
            (n_bootstrap x nfeatures) will be allocated. Increasing the number
            of segments reduces the peak memory demand by that roughly factor.

    n_proc = Parameter(
        1, constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Number of parallel processes to use for computation.
            Requires `joblib` external module.""")

    def __init__(self, **kwargs):
        # force disable auto-train: would make no sense
        Learner.__init__(self, auto_train=False, **kwargs)
        if 1. / (self.params.n_bootstrap + 1) > self.params.feature_thresh_prob:
            raise ValueError('number of bootstrap samples is insufficient for'
                             ' the desired threshold probability')

    def _untrain(self):
        self._thrmap = None
        self._null_cluster_sizes = None

        description="Statistical assessment of (searchlight) MVPA results",
    def _train(self, ds):
        # shortcuts
        chunk_attr = self.params.chunk_attr
        # Step 0: bootstrap maps by drawing one for each chunk and average them
        # (do N iterations)
        # this could take a lot of memory, hence instead of computing the maps
        # we compute the source maps they can be computed from and then (re)build
        # the matrix of bootstrapped maps either row-wise or column-wise (as
        # needed) to save memory by a factor of (close to) `n_bootstrap`
        # which samples belong to which chunk
        chunk_samples = dict([(c, np.where(ds.sa[chunk_attr].value == c)[0])
                              for c in ds.sa[chunk_attr].unique])
        # pre-built the bootstrap combinations
        bcombos = [[random.sample(v, 1)[0] for v in chunk_samples.values()]
                   for i in xrange(self.params.n_bootstrap)]
        bcombos = np.array(bcombos, dtype=int)
        # Step 1: find the per-feature threshold that corresponds to some p
        # in the NULL
        segwidth = ds.nfeatures / self.params.n_blocks
        # speed things up by operating on an array not a dataset
        ds_samples = ds.samples
        if __debug__:
                  'Compute per-feature thresholds in %i blocks of %i features'
                  % (self.params.n_blocks, segwidth))
        # Execution can be done in parallel as the estimation is independent
        # across features

        def featuresegment_producer(ncols):
            for segstart in xrange(0, ds.nfeatures, ncols):
                # one average map for every stored bcombo
                # this also slices the input data into feature subsets
                # for the compute blocks
                yield [np.mean(
                       # get a view to a subset of the features
                       # -- should be somewhat efficient as feature axis is
                       # sliced
                       ds_samples[sidx, segstart:segstart + ncols],
                       for sidx in bcombos]
        if self.params.n_proc == 1:
            # Serial execution
            thrmap = np.hstack(  # merge across compute blocks
                [get_thresholding_map(d, self.params.feature_thresh_prob)
                 # compute a partial threshold map for as many features
                 # as fit into a compute block
                 for d in featuresegment_producer(segwidth)])
            # Parallel execution
            verbose_level_parallel = 50 \
                if (__debug__ and 'GCTHR' in debug.active) else 0
            # local import as only parallel execution needs this
            from joblib import Parallel, delayed
            # same code as above, just in parallel with joblib's Parallel
            thrmap = np.hstack(
                        (d, self.params.feature_thresh_prob)
                             for d in featuresegment_producer(segwidth)))
        # store for later thresholding of input data
        self._thrmap = thrmap
        # Step 2: threshold all NULL maps and build distribution of NULL cluster
        #         sizes
        cluster_sizes = Counter()
        # recompute the bootstrap average maps to threshold them and determine
        # cluster sizes
        dsa = dict(mapper=ds.a.mapper) if 'mapper' in ds.a else {}
        if __debug__:
            debug('GCTHR', 'Estimating NULL distribution of cluster sizes')
        # this step can be computed in parallel chunks to speeds things up
        if self.params.n_proc == 1:
            # Serial execution
            for sidx in bcombos:
                avgmap = np.mean(ds_samples[sidx], axis=0)[None]
                # apply threshold
                clustermap = avgmap > thrmap
                # wrap into a throw-away dataset to get the reverse mapping right
                bds = Dataset(clustermap, a=dsa)
                # this function reverse-maps every sample one-by-one, hence no need
                # to collect chunks of bootstrapped maps
                cluster_sizes = get_cluster_sizes(bds, cluster_sizes)
            # Parallel execution
            # same code as above, just restructured for joblib's Parallel
            for jobres in Parallel(n_jobs=self.params.n_proc,
                                           axis=0)[None] > thrmap,
                                       for sidx in bcombos):
                # aggregate
                cluster_sizes += jobres
        # store cluster size histogram for later p-value evaluation
        # use a sparse matrix for easy consumption (max dim is the number of
        # features, i.e. biggest possible cluster)
        scl = dok_matrix((1, ds.nfeatures + 1), dtype=int)
        for s in cluster_sizes:
            scl[0, s] = cluster_sizes[s]
        self._null_cluster_sizes = scl

    def _call(self, ds):
        if len(ds) > 1:
            # average all samples into one, assuming we got something like one
            # sample per subject as input
            avgr = mean_sample()
            ds = avgr(ds)
        # threshold input; at this point we only have one sample left
        thrd = ds.samples[0] > self._thrmap
        # mapper default
        mapper = IdentityMapper()
        # overwrite if possible
        if hasattr(ds, 'a') and 'mapper' in ds.a:
            mapper = ds.a.mapper
        # reverse-map input
        othrd = _verified_reverse1(mapper, thrd)
        # TODO: what is your purpose in life osamp? ;-)
        osamp = _verified_reverse1(mapper, ds.samples[0])
        # prep output dataset
        outds = ds.copy(deep=False)
        outds.fa['featurewise_thresh'] = self._thrmap
        # determine clusters
        labels, num = measurements.label(othrd,structure=np.ones([3,3,3]))
        area = measurements.sum(othrd,
                                index=np.arange(1, num + 1)).astype(int)
        com = measurements.center_of_mass(
            osamp, labels=labels, index=np.arange(1, num + 1))
        maxpos = measurements.maximum_position(
            osamp, labels=labels, index=np.arange(1, num + 1))
        # for the rest we need the labels flattened
        labels = mapper.forward1(labels)
        # relabel clusters starting with the biggest and increase index with
        # decreasing size
        ordered_labels = np.zeros(labels.shape, dtype=int)
        ordered_area = np.zeros(area.shape, dtype=int)
        ordered_com = np.zeros((num, len(osamp.shape)), dtype=float)
        ordered_maxpos = np.zeros((num, len(osamp.shape)), dtype=float)
        for i, idx in enumerate(np.argsort(area)):
            ordered_labels[labels == idx + 1] = num - i
            # kinda ugly, but we are looping anyway
            ordered_area[i] = area[idx]
            ordered_com[i] = com[idx]
            ordered_maxpos[i] = maxpos[idx]
        labels = ordered_labels
        area = ordered_area[::-1]
        com = ordered_com[::-1]
        maxpos = ordered_maxpos[::-1]
        del ordered_labels  # this one can be big
        # store cluster labels after forward-mapping
        outds.fa['clusters_featurewise_thresh'] = labels.copy()
        # location info
        outds.a['clusterlocations'] = \
                [com, maxpos], names=('center_of_mass', 'max'))

        # update cluster size histogram with the actual result to get a
        # proper lower bound for p-values
        # this will make a copy, because the original matrix is int
        cluster_probs_raw = _transform_to_pvals(
            area, self._null_cluster_sizes.astype('float'))

        clusterstats = (
            [area, cluster_probs_raw],
            ['size', 'prob_raw']
        # evaluate a bunch of stats for all clusters
        morestats = {}
        for cid in xrange(len(area)):
            # keep clusters on outer loop, because selection is more expensive
            clvals = ds.samples[0, labels == cid + 1]
            for id_, fx in (
                    ('mean', np.mean),
                    ('median', np.median),
                    ('min', np.min),
                    ('max', np.max),
                    ('std', np.std)):
                stats = morestats.get(id_, [])
                morestats[id_] = stats

        for k, v in morestats.items():

        if self.params.multicomp_correction is not None:
            # do a local import as only this tiny portion needs statsmodels
            import statsmodels.stats.multitest as smm
            rej, probs_corr = smm.multipletests(
            # store corrected per-cluster probabilities
            # remove cluster labels that did not pass the FWE threshold
            for i, r in enumerate(rej):
                if not r:
                    labels[labels == i + 1] = 0
            outds.fa['clusters_fwe_thresh'] = labels
        outds.a['clusterstats'] = \
            np.rec.fromarrays(clusterstats[0], names=clusterstats[1])
        return outds
class _SVM(Classifier):
    """Support Vector Machine Classifier.

    Base class for all external SVM implementations.

    Derived classes should define:

    * _KERNELS: map(dict) should define assignment to a tuple containing
      implementation kernel type, list of parameters adherent to the
      kernel, and sensitivity analyzer e.g.::

        _KERNELS = {
             'linear': (shogun.Kernel.LinearKernel, (), LinearSVMWeights),
             'rbf' :   (shogun.Kernel.GaussianKernel, ('gamma',), None),

    * _KNOWN_IMPLEMENTATIONS: map(dict) should define assignment to a
      tuple containing implementation of the SVM, list of parameters
      adherent to the implementation, additional internals, and
      description e.g.::

          'C_SVC' : (svm.svmc.C_SVC, ('C',),
                   ('binary', 'multiclass'), 'C-SVM classification'),


    _ATTRIBUTE_COLLECTIONS = ['params'] # enforce presence of params collections

    # Placeholder: map kernel names to sensitivity classes, ie
    # 'linear':LinearSVMWeights, for each backend
    kernel = Parameter(None,
                       # XXX: Currently, can't be ensured using constraints
                       # allowedtype=Kernel,
                       doc='Kernel object', index=-1)

    _SVM_PARAMS = {
        'C' : Parameter(-1.0,
                  doc='Trade-off parameter between width of the '
                      'margin and number of support vectors. Higher C -- '
                      'more rigid margin SVM. In linear kernel, negative '
                      'values provide automatic scaling of their value '
                      'according to the norm of the data'),
        'nu' : Parameter(0.5, min=0.0, max=1.0,
                  doc='Fraction of datapoints within the margin'),
        'cache_size': Parameter(100,
                  doc='Size of the kernel cache, specified in megabytes'),
        'tube_epsilon': Parameter(0.01,
                  doc='Epsilon in epsilon-insensitive loss function of '
                      'epsilon-SVM regression (SVR)'),
        'tau': Parameter(1e-6, doc='TAU parameter of KRR regression in shogun'),
        'probability': Parameter(0,
                  doc='Flag to signal either probability estimate is obtained '
                      'within LIBSVM'),
        'shrinking': Parameter(1, doc='Either shrinking is to be conducted'),
        'weight_label': Parameter([], constraints=EnsureListOf(int),
                  doc='To be used in conjunction with weight for custom '
                      'per-label weight'),
        # TODO : merge them into a single dictionary
        'weight': Parameter([], constraints=EnsureListOf(float),
                  doc='Custom weights per label'),
        # For some reason setting up epsilon to 1e-5 slowed things down a bit
        # in comparison to how it was before (in yoh/master) by up to 20%... not clear why
        # may be related to 1e-3 default within _svm.py?
        'epsilon': Parameter(5e-5, min=1e-10,
                  doc='Tolerance of termination criteria. (For nu-SVM default is 0.001)')

    _KNOWN_PARAMS = ()                  # just a placeholder to please lintian
    """Parameters which are specific to a given instantiation of SVM

    __tags__ = [ 'svm', 'kernel-based', 'swig' ]

    def __init__(self, **kwargs):
        """Init base class of SVMs. *Not to be publicly used*

        TODO: handling of parameters might migrate to be generic for
        all classifiers. SVMs are chosen to be testbase for that
        functionality to see how well it would fit.

        # Check if requested implementation is known
        svm_impl = kwargs.get('svm_impl', None)
        if not svm_impl in self._KNOWN_IMPLEMENTATIONS:
            raise ValueError, \
                  "Unknown SVM implementation '%s' is requested for %s." \
                  "Known are: %s" % (svm_impl, self.__class__,
        self._svm_impl = svm_impl

        impl, add_params, add_internals, descr = \

        # Add corresponding parameters to 'known' depending on the
        # implementation chosen
        if add_params is not None:
            self._KNOWN_PARAMS = \
                 self._KNOWN_PARAMS[:] + list(add_params)

        # Assign per-instance __tags__
        self.__tags__ = self.__tags__[:] + [svm_impl]

        # Add corresponding internals
        if add_internals is not None:
            self.__tags__ += list(add_internals)

        k = kwargs.get('kernel', None)
        if k is None:
            kwargs['kernel'] = self.__default_kernel_class__()
        if 'linear' in ('%s'%kwargs['kernel']).lower(): # XXX not necessarily best
            self.__tags__ += [ 'linear', 'has_sensitivity' ]
            self.__tags__ += [ 'non-linear' ]

        # pop out all args from **kwargs which are known to be SVM parameters
        _args = {}
        for param in self._KNOWN_PARAMS + ['svm_impl']: # Update to remove kp's?
            if param in kwargs:
                _args[param] = kwargs.pop(param)

            Classifier.__init__(self, **kwargs)
        except TypeError, e:
            if "__init__() got an unexpected keyword argument " in e.args[0]:
                # TODO: make it even more specific -- if that argument is listed
                # within _SVM_PARAMS
                e.args = tuple( [e.args[0] +
                                 "\n Given SVM instance of class %s knows following parameters: %s" %
                                 (self.__class__, self._KNOWN_PARAMS) + \
            raise e

        # populate collections and add values from arguments
        for paramfamily, paramset in ( (self._KNOWN_PARAMS, self.params),):
            for paramname in paramfamily:
                if not (paramname in self._SVM_PARAMS):
                    raise ValueError, "Unknown parameter %s" % paramname + \
                          ". Known SVM params are: %s" % self._SVM_PARAMS.keys()
                param = deepcopy(self._SVM_PARAMS[paramname])
                if paramname in _args:
                    param.value = _args[paramname]
                    # XXX might want to set default to it -- not just value

                paramset[paramname] = param

        # TODO: Below commented out because kernel_type has been removed.  
        # Find way to set default C as necessary
        # tune up C if it has one and non-linear classifier is used
        #if self.params.has_key('C') and kernel_type != "linear" \
               #and self.params['C'].is_default:
            #if __debug__:
                #debug("SVM_", "Assigning default C value to be 1.0 for SVM "
                      #"%s with non-linear kernel" % self)
            #self.params['C'].default = 1.0

        # Some postchecks
        if 'weight' in self.params and 'weight_label' in self.params:
            if not len(self.params.weight_label) == len(self.params.weight):
                raise ValueError, "Lenghts of 'weight' and 'weight_label' lists " \
                      "must be equal."

        if __debug__:
            debug("SVM", "Initialized %s with kernel %s" % 
                  (self, self.params.kernel))
class GNB(Classifier):
    """Gaussian Naive Bayes `Classifier`.

    `GNB` is a probabilistic classifier relying on Bayes rule to
    estimate posterior probabilities of labels given the data.  Naive
    assumption in it is an independence of the features, which allows
    to combine per-feature likelihoods by a simple product across
    likelihoods of "independent" features.
    See http://en.wikipedia.org/wiki/Naive_bayes for more information.

    Provided here implementation is "naive" on its own -- various
    aspects could be improved, but it has its own advantages:

    - implementation is simple and straightforward
    - no data copying while considering samples of specific class
    - provides alternative ways to assess prior distribution of the
      classes in the case of unbalanced sets of samples (see parameter
    - makes use of NumPy broadcasting mechanism, so should be
      relatively efficient
    - should work for any dimensionality of samples

    `GNB` is listed both as linear and non-linear classifier, since
    specifics of separating boundary depends on the data and/or
    parameters: linear separation is achieved whenever samples are
    balanced (or ``prior='uniform'``) and features have the same
    variance across different classes (i.e. if
    ``common_variance=True`` to enforce this).

    Whenever decisions are made based on log-probabilities (parameter
    ``logprob=True``, which is the default), then conditional
    attribute `values`, if enabled, would also contain
    log-probabilities.  Also mention that normalization by the
    evidence (P(data)) is disabled by default since it has no impact
    per se on classification decision.  You might like to set
    parameter normalize to True if you want to access properly scaled
    probabilities in `values` conditional attribute.
    # XXX decide when should we set corresponding internal,
    #     since it depends actually on the data -- no clear way,
    #     so set both linear and non-linear
    __tags__ = [ 'gnb', 'linear', 'non-linear',
                       'binary', 'multiclass' ]

    common_variance = Parameter(False, constraints='bool',
             doc="""Use the same variance across all classes.""")

    prior = Parameter('laplacian_smoothing',
             constraints=EnsureChoice('laplacian_smoothing', 'uniform', 'ratio'),
             doc="""How to compute prior distribution.""")

    logprob = Parameter(True, constraints='bool',
             doc="""Operate on log probabilities.  Preferable to avoid unneeded
             exponentiation and loose precision.
             If set, logprobs are stored in `values`""")

    normalize = Parameter(False, constraints='bool',
             doc="""Normalize (log)prob by P(data).  Requires probabilities thus
             for `logprob` case would require exponentiation of 'logprob's, thus
             disabled by default since does not impact classification output.

    def __init__(self, **kwargs):
        """Initialize an GNB classifier.

        # init base class first
        Classifier.__init__(self, **kwargs)

        # pylint friendly initializations
        self.means = None
        """Means of features per class"""
        self.variances = None
        """Variances per class, but "vars" is taken ;)"""
        self.ulabels = None
        """Labels classifier was trained on"""
        self.priors = None
        """Class probabilities"""

        # Define internal state of classifier
        self._norm_weight = None

    def _get_priors(self, nlabels, nsamples, nsamples_per_class):
        """Return prior probabilities given data
        # helper function - squash all dimensions but 1
        squash = lambda x: np.atleast_1d(x.squeeze())

        prior = self.params.prior
        if prior == 'uniform':
            priors = np.ones((nlabels,))/nlabels
        elif prior == 'laplacian_smoothing':
            priors = (1+squash(nsamples_per_class)) \
                          / (float(nsamples) + nlabels)
        elif prior == 'ratio':
            priors = squash(nsamples_per_class) / float(nsamples)
            raise ValueError(
                "No idea on how to handle '%s' way to compute priors"
                % self.params.prior)
        return priors

    def _train(self, dataset):
        """Train the classifier using `dataset` (`Dataset`).
        params = self.params
        targets_sa_name = self.get_space()
        targets_sa = dataset.sa[targets_sa_name]

        # get the dataset information into easy vars
        X = dataset.samples
        labels = targets_sa.value
        self.ulabels = ulabels = targets_sa.unique
        nlabels = len(ulabels)
        label2index = dict((l, il) for il, l in enumerate(ulabels))

        # set the feature dimensions
        nsamples = len(X)
        s_shape = X.shape[1:]           # shape of a single sample

        self.means = means = \
                     np.zeros((nlabels, ) + s_shape)
        self.variances = variances = \
                     np.zeros((nlabels, ) + s_shape)
        # degenerate dimension are added for easy broadcasting later on
        nsamples_per_class = np.zeros((nlabels,) + (1,)*len(s_shape))

        # Estimate means and number of samples per each label
        for s, l in zip(X, labels):
            il = label2index[l]         # index of the label
            nsamples_per_class[il] += 1
            means[il] += s

        # helper function - squash all dimensions but 1
        squash = lambda x: np.atleast_1d(x.squeeze())
        ## Actually compute the means
        non0labels = (squash(nsamples_per_class) != 0)
        means[non0labels] /= nsamples_per_class[non0labels]

        # Store prior probabilities
        self.priors = self._get_priors(nlabels, nsamples, nsamples_per_class)

        # Estimate variances
        # better loop than repmat! ;)
        for s, l in zip(X, labels):
            il = label2index[l]         # index of the label
            variances[il] += (s - means[il])**2

        ## Actually compute the variances
        if params.common_variance:
            # we need to get global std
            cvar = np.sum(variances, axis=0)/nsamples # sum across labels
            # broadcast the same variance across labels
            variances[:] = cvar
            variances[non0labels] /= nsamples_per_class[non0labels]

        # Precompute and store weighting coefficient for Gaussian
        if params.logprob:
            # it would be added to exponent
            self._norm_weight = -0.5 * np.log(2*np.pi*variances)
            self._norm_weight = 1.0/np.sqrt(2*np.pi*variances)

        if __debug__ and 'GNB' in debug.active:
            debug('GNB', "training finished on data.shape=%s " % (X.shape, )
                  + "min:max(data)=%f:%f" % (np.min(X), np.max(X)))

    def _untrain(self):
        """Untrain classifier and reset all learnt params
        self.means = None
        self.variances = None
        self.ulabels = None
        self.priors = None
        super(GNB, self)._untrain()

    def _predict(self, data):
        """Predict the output for the provided data.
        params = self.params
        # argument of exponentiation
        scaled_distances = \
            -0.5 * (((data - self.means[:, np.newaxis, ...])**2) \
                          / self.variances[:, np.newaxis, ...])
        if params.logprob:
            # if self.params.common_variance:
            # XXX YOH:
            # For decision there is no need to actually compute
            # properly scaled p, ie 1/sqrt(2pi * sigma_i) could be
            # simply discarded since it is common across features AND
            # classes
            # For completeness -- computing everything now even in logprob
            lprob_csfs = self._norm_weight[:, np.newaxis, ...] \
                         + scaled_distances

            # XXX for now just cut/paste with different operators, but
            #     could just bind them and reuse in the same equations
            # Naive part -- just a product of probabilities across features
            ## First we need to reshape to get class x samples x features
            lprob_csf = lprob_csfs.reshape(
                lprob_csfs.shape[:2] + (-1,))
            ## Now -- sum across features
            lprob_cs = lprob_csf.sum(axis=2)

            # Incorporate class probabilities:
            prob_cs_cp = lprob_cs + np.log(self.priors[:, np.newaxis])

            # Just a regular Normal distribution with per
            # feature/class mean and variances
            prob_csfs = \
                 self._norm_weight[:, np.newaxis, ...] \
                 * np.exp(scaled_distances)

            # Naive part -- just a product of probabilities across features
            ## First we need to reshape to get class x samples x features
            prob_csf = prob_csfs.reshape(
                prob_csfs.shape[:2] + (-1,))
            ## Now -- product across features
            prob_cs = prob_csf.prod(axis=2)

            # Incorporate class probabilities:
            prob_cs_cp = prob_cs * self.priors[:, np.newaxis]

        # Normalize by evidence P(data)
        if params.normalize:
            if params.logprob:
                prob_cs_cp_real = np.exp(prob_cs_cp)
                prob_cs_cp_real = prob_cs_cp
            prob_s_cp_marginals = np.sum(prob_cs_cp_real, axis=0)
            if params.logprob:
                prob_cs_cp -= np.log(prob_s_cp_marginals)
                prob_cs_cp /= prob_s_cp_marginals

        # Take the class with maximal (log)probability
        winners = prob_cs_cp.argmax(axis=0)
        predictions = [self.ulabels[c] for c in winners]

        # set to the probabilities per class
        self.ca.estimates = prob_cs_cp.T

        if __debug__ and 'GNB' in debug.active:
            debug('GNB', "predict on data.shape=%s min:max(data)=%f:%f " %
                  (data.shape, np.min(data), np.max(data)))

        return predictions
class SearchlightHyperalignment(ClassWithCollections):
    Given a list of datasets, provide a list of mappers
    into common space using searchlight based hyperalignment.
    :ref:`Guntupalli et al., Cerebral Cortex (2016)`

    1) Input datasets should all be of the same size in terms of
    nsamples and nfeatures, and be coarsely aligned (using anatomy).
    2) All features in all datasets should be zscored.
    3) Datasets should have feature attribute `voxel_indices`
    containing spatial coordinates of all features

    # TODO: add {training_,}residual_errors .ca ?

    ## Parameters common with Hyperalignment but overriden

    ref_ds = Parameter(
        constraints=EnsureInt() & EnsureRange(min=0),
        doc="""Index of a dataset to use as a reference. First dataset is used
            as default. If you supply exclude_from_model list, you should supply
            the ref_ds index as index before you remove those excluded datasets.
            Note that unlike regular Hyperalignment, there is no automagic
            choosing of the "best" ref_ds by default.""")

    ## Parameters specific to SearchlightHyperalignment

    queryengine = Parameter(
        doc="""A single (or a list of query engines, one per each dataset) to be
        used.  If not provided, volumetric searchlight, with spherical
        neighborhood as instructed by radius parameter will be used.""")

    radius = Parameter(
        constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Radius of a searchlight sphere in number of voxels to be used if
         no `queryengine` argument was provided.""")

    nproc = Parameter(1,
                      constraints=EnsureInt() & EnsureRange(min=1)
                      | EnsureNone(),
                      doc="""Number of cores to use.""")

    nblocks = Parameter(
        constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(),
        doc="""Number of blocks to divide to process. Higher number results in
            smaller memory consumption.""")

    sparse_radius = Parameter(
        constraints=(EnsureRange(min=1) & EnsureInt() | EnsureNone()),
        doc="""Radius supplied to scatter_neighborhoods in units of voxels.
            This is effectively the distance between the centers where
            hyperalignment is performed in searchlights.  ATM applicable only
            if no custom queryengine was provided.
            If None, hyperalignment is performed at every voxel (default).""")

    hyperalignment = Parameter(
        doc="""Hyperalignment instance to be used in each searchlight sphere.
            Default is just the Hyperalignment instance with default
            parameters. Its `ref_ds` parameter would be overridden by the
            `ref_ds` parameter of this SearchlightHyperalignment instance
            because we want to be consistent and only need one `ref_ds`.""")

    combine_neighbormappers = Parameter(
        doc="""This param determines whether to combine mappers for each voxel
            from its neighborhood searchlights or just use the mapper for which
            it is the center voxel. This will not be applicable for certain
            queryengines whose ids and neighborhoods are from different spaces,
            such as for SurfaceVerticesQueryEngine""")

    compute_recon = Parameter(
        doc="""This param determines whether to compute reverse mappers for each
            subject from common-space to subject space. These will be stored in
            the StaticProjectionMapper() and used when reverse() is called.
            Enabling it will double the size of the mappers returned.""")

    featsel = Parameter(
        constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0)
        | EnsureInt() & EnsureRange(min=2),
        """Determines if feature selection will be performed in each searchlight.
            1.0: Use all features. < 1.0 is understood as selecting that
            proportion of features in each searchlight of ref_ds using feature scores;
            > 1.0 is understood as selecting at most that many features in each

    # TODO: Should we get rid of this feature?
    use_same_features = Parameter(
        doc="""Select the same (best) features when doing feature selection for
            all datasets.""")

    exclude_from_model = Parameter(
        doc="""List of dataset indices that will not participate in building
            common model.  These will still get mappers back but they don't
            influence the model or voxel selection.""")

    mask_node_ids = Parameter(
        constraints=EnsureListOf(int) | EnsureNone(),
        doc="""You can specify a mask to compute searchlight hyperalignment only
            within this mask.  These would be a list of voxel indices.""")

    dtype = Parameter(
        doc="""dtype of elements transformation matrices to save on memory for
            big datasets""")

    results_backend = Parameter(
        constraints=EnsureChoice('hdf5', 'native'),
        doc="""'hdf5' or 'native'. See Searchlight documentation.""")

    tmp_prefix = Parameter(
        doc="""Prefix for temporary files. See Searchlight documentation.""")

    def __init__(self, **kwargs):
        ClassWithCollections.__init__(self, **kwargs)
        self.ndatasets = 0
        self.nfeatures = 0
        self.projections = None
        # This option makes the roi_seed in each SL to be selected during feature selection
        self.force_roi_seed = True
        if self.params.nproc is not None and self.params.nproc > 1 \
                and not externals.exists('pprocess'):
            raise RuntimeError("The 'pprocess' module is required for "
                               "multiprocess searchlights. Please either "
                               "install python-pprocess, or reduce `nproc` "
                               "to 1 (got nproc=%i) or set to default None" %
        if not externals.exists('scipy'):
            raise RuntimeError("The 'scipy' module is required for "
                               "searchlight hyperalignment.")
        if self.params.results_backend == 'native':
            raise NotImplementedError(
                "'native' mode to handle results is still a "
                "work in progress.")
            #warning("results_backend is set to 'native'. This has been known"
            #        "to result in longer run time when working with big datasets.")
        if self.params.results_backend == 'hdf5' and \
                not externals.exists('h5py'):
            raise RuntimeError("The 'hdf5' module is required for "
                               "when results_backend is set to 'hdf5'")

    def _proc_block(self,
        if seed is not None:
        if __debug__:
                  'Starting computing block for %i elements' % len(block))
        bar = ProgressBar()
        projections = [
            csc_matrix((self.nfeatures, self.nfeatures),
            for isub in range(self.ndatasets)
        for i, node_id in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine

            # Find the neighborhood for that selected nearest node
            roi_feature_ids_all = [qe[node_id] for qe in queryengines]
            # handling queryengines that return AttrDatasets
            for isub in range(len(roi_feature_ids_all)):
                if is_datasetlike(roi_feature_ids_all[isub]):
                    # making sure queryengine returned proper shaped output
                    assert (roi_feature_ids_all[isub].nsamples == 1)
                    roi_feature_ids_all[isub] = roi_feature_ids_all[
                        isub].samples[0, :].tolist()
            if len(roi_feature_ids_all) == 1:
                # just one was provided to be "broadcasted"
                roi_feature_ids_all *= len(datasets)
            # if qe returns zero-sized ROI for any subject, pass...
            if any(len(x) == 0 for x in roi_feature_ids_all):
            # selecting neighborhood for all subject for hyperalignment
            ds_temp = [
                sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)
            if self.force_roi_seed:
                roi_seed = np.array(
                    roi_feature_ids_all[self.params.ref_ds]) == node_id
                ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
            if __debug__:
                msg = 'ROI (%i/%i), %i features' % (
                    i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
            hmappers = featselhyper(ds_temp)
            assert (len(hmappers) == len(datasets))
            roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds]
            for isub, roi_feature_ids in enumerate(roi_feature_ids_all):
                if not self.params.combine_neighbormappers:
                    I = roi_feature_ids
                    #J = [roi_feature_ids[node_id]] * len(roi_feature_ids)
                    J = [node_id] * len(roi_feature_ids)
                    V = hmappers[isub].tolist()
                    if np.isscalar(V):
                        V = [V]
                    I, J, V = [], [], []
                    for f2, roi_feature_id_ref_ds in enumerate(
                        I += roi_feature_ids
                        J += [roi_feature_id_ref_ds] * len(roi_feature_ids)
                        V += hmappers[isub][:, f2].tolist()
                proj = coo_matrix(
                    (V, (I, J)),
                               max(I) + 1), max(self.nfeatures,
                                                max(J) + 1)),
                proj = proj.tocsc()
                # Cleaning up the current subject's projections to free up memory
                hmappers[isub] = [[] for _ in hmappers]
                projections[isub] = projections[isub] + proj

        if self.params.results_backend == 'native':
            return projections
        elif self.params.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = mktemp(prefix=self.params.tmp_prefix,
                                  suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, projections)
            if __debug__:
                debug('SLC_', "Results stored")
            return results_file
            raise RuntimeError("Must not reach this point")

    def __handle_results(self, results):
        if self.params.results_backend == 'hdf5':
            # 'results' must be just a filename
            assert (isinstance(results, str))
            if __debug__:
                debug('SLC', "Loading results from %s" % results)
            results_data = h5load(results)
            if __debug__:
                      "Loaded results of len=%d from" % len(results_data))
            for isub, res in enumerate(results_data):
                self.projections[isub] = self.projections[isub] + res
            if __debug__:
                debug('SLC_', "Finished adding results")

    def __handle_all_results(self, results):
        """Helper generator to decorate passing the results out to
        for r in results:
            yield self.__handle_results(r)

        description="Full cortex hyperalignment of data to a common space",
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based

          datasets : list or tuple of datasets

        A list of trained StaticProjectionMappers of the same length as datasets

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                roi_ids = sidx
                scoords, sidx = scatter_neighborhoods(
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration

            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        return self.projections

    def _get_verified_ids(self, queryengines):
        """Helper to return ids of queryengines, verifying that they are the same"""
        qe0 = queryengines[0]
        roi_ids = qe0.ids
        for qe in queryengines:
            if qe is not qe0:
                # if a different query engine (so wasn't just replicated)
                if np.any(qe.ids != qe0.ids):
                    raise RuntimeError(
                        "Query engine %s provided different ids than %s. Not supported"
                        % (qe0, qe))
        return roi_ids

    def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds):
        """Helper to return trained query engine(s), either list of one or one per each dataset

        if queryengine is None then IndexQueryEngine based on radius is created
        ndatasets = len(datasets)
        if queryengine:
            if isinstance(queryengine, (list, tuple)):
                queryengines = queryengine
                if len(queryengines) != ndatasets:
                    raise ValueError(
                        "%d query engines were specified although %d datasets "
                        "provided" % (len(queryengines), ndatasets))
                _shpaldebug("Training provided query engines")
                for qe, ds in zip(queryengines, datasets):
                queryengines = [queryengine]
                'No custom query engines were provided. Setting up the '
                'volumetric query engine on voxel_indices.')
            queryengine = IndexQueryEngine(voxel_indices=Sphere(radius))
            queryengines = [queryengine]
        return queryengines
Exemple #28
class PDistConsistency(Measure):
    """Calculate the correlations of PDist measures across chunks

    This measures the consistency in similarity structure across runs
    within individuals, or across individuals if the target dataset is made from
    several subjects in some common space and where the sample attribute
    specified as the chunks_attr codes for subject identity.

    @author: ACC Aug 2013
    is_trained = True
    """Indicate that this measure is always trained."""

    chunks_attr = Parameter('chunks', constraints='str', doc="""\
          Chunks attribute to use for chunking dataset. Can be any samples

    pairwise_metric = Parameter('correlation', constraints='str', doc="""\
          Distance metric to use for calculating dissimilarity matrices from
          the set of samples in each chunk specified. See
          spatial.distance.pdist for all possible metrics.""")

    consistency_metric = Parameter('pearson',
          Correlation measure to use for the correlation between dissimilarity

    center_data = Parameter(False, constraints='bool', doc="""\
          If True then center each column of the data matrix by subtracing the
          column mean from each element. This is recommended especially when
          using pairwise_metric='correlation'.""")

    square = Parameter(False, constraints='bool', doc="""\
          If True return the square distance matrix, if False, returns the
          flattened upper triangle.""")

    def __init__(self, **kwargs):
          Contains the pairwise correlations between the DSMs
          computed from each chunk of the input dataset. If square is False,
          this is a column vector of length N(N-1)/2 for N chunks. If square
          is True, this is a square matrix of size NxN for N chunks.
        # TODO: Another metric for consistency metric could be the "Rv"
        # coefficient...  (ac)
        # init base classes first
        Measure.__init__(self, **kwargs)

    def _call(self, dataset):
        """Computes the average correlation in similarity structure across chunks."""

        chunks_attr = self.params.chunks_attr
        nchunks = len(dataset.sa[chunks_attr].unique)
        if nchunks < 2:
            raise StandardError("This measure calculates similarity consistency across "
                                "chunks and is not meaningful for datasets with only "
                                "one chunk:")
        dsms = []
        chunks = []
        for chunk in dataset.sa[chunks_attr].unique:
            data = np.atleast_2d(
                    dataset.samples[dataset.sa[chunks_attr].value == chunk,:])
            if self.params.center_data:
                data = data - np.mean(data,0)
            dsm = pdist(data, self.params.pairwise_metric)
        dsms = np.vstack(dsms)

        if self.params.consistency_metric=='spearman':
            dsms = np.apply_along_axis(rankdata, 1, dsms)
        corrmat = np.corrcoef(dsms)
        if self.params.square:
            ds = Dataset(corrmat, sa={self.params.chunks_attr: chunks})
            ds = Dataset(squareform(corrmat,checks=False),
                         sa=dict(pairs=list(combinations(chunks, 2))))
        return ds
class Hyperalignment(ClassWithCollections):
    """Align the features across multiple datasets into a common feature space.

    This is a three-level algorithm. In the first level, a series of input
    datasets is projected into a common feature space using a configurable
    mapper. The common space is initially defined by a chosen exemplar from the
    list of input datasets, but is subsequently refined by iteratively combining
    the common space with the projected input datasets.

    In the second (optional) level, the original input datasets are again
    aligned with (or projected into) the intermediate first-level common
    space. Through a configurable number of iterations the common space is
    further refined by repeated projections of the input datasets and
    combination/aggregation of these projections into an updated common space.

    In the third level, the input datasets are again aligned with the, now
    final, common feature space. The output of this algorithm are trained
    mappers (one for each input dataset) that transform the individual features
    spaces into the common space.

    Level 1 and 2 are performed by the ``train()`` method, and level 3 is
    performed when the trained Hyperalignment instance is called with a list of
    datasets. This dataset list may or may not be identical to the training

    The default values for the parameters of the algorithm (e.g. projection via
    Procrustean transformation, common space aggregation by averaging) resemble
    the setup reported in :ref:`Haxby et al., Neuron (2011) <HGC+11>` *A common,
    high-dimensional model of the representational space in human ventral
    temporal cortex.*

    >>> # get some example data
    >>> from mvpa2.testing.datasets import datasets
    >>> from mvpa2.misc.data_generators import random_affine_transformation
    >>> ds4l = datasets['uni4large']
    >>> # generate a number of distorted variants of this data
    >>> dss = [random_affine_transformation(ds4l) for i in xrange(4)]
    >>> ha = Hyperalignment()
    >>> ha.train(dss)
    >>> mappers = ha(dss)
    >>> len(mappers)

    training_residual_errors = ConditionalAttribute(
        doc="""Residual error (norm of the difference between common space
                and projected data) per each training dataset at each level. The
                residuals are stored in a dataset with one row per level, and
                one column per input dataset. The first row corresponds to the
                error 1st-level of hyperalignment the remaining rows store the
                residual errors for each 2nd-level iteration.""")

    residual_errors = ConditionalAttribute(
        doc="""Residual error (norm of the difference between common space
                and projected data) per each dataset. The residuals are stored
                in a single-row dataset with one column per input dataset.""")

    # XXX Who cares whether it was chosen, or specified? This should be just
    # 'ref_ds'
    chosen_ref_ds = ConditionalAttribute(
        doc="""Index of the input dataset used as 1st-level reference

    # Lets use built-in facilities to specify parameters which
    # constructor should accept
    # the ``space`` of the mapper determines where the algorithm places the
    # common space definition in the datasets
    alignment = Parameter(
        # might provide allowedtype
        # XXX Currently, there's no way to handle this with constraints
        doc="""The multidimensional transformation mapper. If
            `None` (default) an instance of
            :class:`~mvpa2.mappers.procrustean.ProcrusteanMapper` is
    output_dim = Parameter(
        constraints=(EnsureInt() & EnsureRange(min=1) | EnsureNone()),
        doc="""Output common space dimensionality. If None, datasets are aligned
             to the features of the `ref_ds`. Otherwise, dimensionality reduction is
             performed using SVD and only the top SVs are kept. To get all features in
             SVD-aligned space, give output_dim>=nfeatures.

    alpha = Parameter(
        constraints=EnsureFloat() & EnsureRange(min=0, max=1),
        doc="""Regularization parameter to traverse between (Shrinkage)-CCA
                (canonical correlation analysis) and regular hyperalignment.
                Setting alpha to 1 makes the algorithm identical to
                hyperalignment and alpha of 0 makes it CCA. By default,
                it is 1, therefore hyperalignment. """)

    level2_niter = Parameter(1,
                             constraints=EnsureInt() & EnsureRange(min=0),
                             doc="Number of 2nd-level iterations.")

    ref_ds = Parameter(
        constraints=(EnsureRange(min=0) & EnsureInt()
                     | EnsureNone()),
        doc="""Index of a dataset to use as 1st-level common space
                reference.  If `None`, then the dataset with the maximum
                number of features is used.""")

    nproc = Parameter(
        doc="""Number of processes to use to parallelize the last step of
                alignment. If different from 1, it passes it as n_jobs to
                `joblib.Parallel`. Requires joblib package.""")

    zscore_all = Parameter(
        doc="""Flag to Z-score all datasets prior hyperalignment.
            Turn it off if Z-scoring is not desired or was already performed.
            If True, returned mappers are ChainMappers with the Z-scoring
            prepended to the actual projection.""")

    zscore_common = Parameter(
        doc="""Flag to Z-score the common space after each adjustment.
                This should be left enabled in most cases.""")

    combiner1 = Parameter(
        mean_xy,  #
        doc="""How to update common space in the 1st-level loop. This must
                be a callable that takes two arguments. The first argument is
                one of the input datasets after projection onto the 1st-level
                common space. The second argument is the current 1st-level
                common space. The 1st-level combiner is called iteratively for
                each projected input dataset, except for the reference dataset.
                By default the new common space is the average of the current
                common space and the recently projected dataset.""")

    level1_equal_weight = Parameter(
        doc="""Flag to force all datasets to have the same weight in the
            level 1 iteration. False (default) means each time the new common
            space is the average of the current common space and the newly
            aligned dataset, and therefore earlier datasets have less weight."""

    combiner2 = Parameter(
        doc="""How to combine all individual spaces to common space. This
            must be a callable that take a sequence of datasets as an argument.
            The callable must return a single array. This combiner is called
            once with all datasets after 1st-level projection to create an
            updated common space, and is subsequently called again after each
            2nd-level iteration.""")

    joblib_backend = Parameter(
        constraints=EnsureChoice('multiprocessing', 'threading')
        | EnsureNone(),
        doc="""Backend to use for joblib when using nproc>1.
            Options are 'multiprocessing' and 'threading'. Default is to use
            'multiprocessing' unless run on OSX which have known issues with
            joblib v0.10.3. If it is set to specific value here, then that will
            be used at the risk of failure.""")

    def __init__(self, **kwargs):
        ClassWithCollections.__init__(self, **kwargs)
        self.commonspace = None
        # mapper to a low-dimensional subspace derived using SVD on training data
        # Initializing here so that call can access it without passing after train.
        # Moreover, it is similar to commonspace, in that, it is required for mapping
        # new subjects
        self._svd_mapper = None

               description="Hyperalignment of data to a common space",
    def train(self, datasets):
        """Derive a common feature space from a series of datasets.

        datasets : sequence of datasets

        A list of trained Mappers matching the number of input datasets.
        params = self.params  # for quicker access ;)
        ca = self.ca
        # Check to make sure we get a list of datasets as input.
        if not isinstance(datasets, (list, tuple, np.ndarray)):
            raise TypeError("Input datasets should be a sequence "
                            "(of type list, tuple, or ndarray) of datasets.")

        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]
        alpha = params.alpha

        residuals = None
        if ca['training_residual_errors'].enabled:
            residuals = np.zeros((1 + params.level2_niter, ndatasets))
            ca.training_residual_errors = Dataset(
                    ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)]

        if __debug__:
                  "Hyperalignment %s for %i datasets" % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
            ref_ds = params.ref_ds
            # Making sure that ref_ds is within range.
            #Parameter() already checks for it being a non-negative integer
            if ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.chosen_ref_ds = ref_ds
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # initial common space is the reference dataset
        commonspace = datasets[ref_ds].samples
        # the reference dataset might have been zscored already, don't do it
        # twice
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                    'HPAL_', "Creating copy of a commonspace and assuring "
                    "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)
        # If there is only one dataset in training phase, there is nothing to be done
        # just use that data as the common space
        if len(datasets) < 2:
            self.commonspace = commonspace
            # create a mapper per dataset
            # might prefer some other way to initialize... later
            mappers = [deepcopy(params.alignment) for ds in datasets]

            # Level 1 -- initial projection
            lvl1_projdata = self._level1(datasets, commonspace, ref_ds,
                                         mappers, residuals)
            # Level 2 -- might iterate multiple times
            # this is the final common space
            self.commonspace = self._level2(datasets, lvl1_projdata, mappers,
        if params.output_dim is not None:
            mappers = self._level3(datasets)
            self._svd_mapper = SVDMapper()
            self._svd_mapper.train(self._map_and_mean(datasets, mappers))
            self._svd_mapper = StaticProjectionMapper(
                proj=self._svd_mapper.proj[:, :params.output_dim])

    def __call__(self, datasets):
        """Derive a common feature space from a series of datasets.

        datasets : sequence of datasets

        A list of trained Mappers matching the number of input datasets.
        if self.commonspace is None:
            # Check to make sure we get a list of datasets as input.
            if not isinstance(datasets, (list, tuple, np.ndarray)):
                raise TypeError(
                    "Input datasets should be a sequence "
                    "(of type list, tuple, or ndarray) of datasets.")

        # place datasets into a copy of the list since items
        # will be reassigned
        datasets = list(datasets)

        params = self.params  # for quicker access ;)
        alpha = params.alpha  # for letting me be lazy ;)
        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            # zscore them once while storing corresponding ZScoreMapper's
            # so we can assemble a comprehensive mapper at the end
            # (together with procrustes)
            zmappers = []
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # Level 3 -- final, from-scratch, alignment to final common space
        mappers = self._level3(datasets)
        # return trained mappers for projection from all datasets into the
        # common space
        if params.zscore_all:
            # We need to construct new mappers which would chain
            # zscore and then final transformation
            if params.alpha < 1:
                mappers = [
                    ChainMapper([zm, wm, m])
                    for zm, wm, m in zip(zmappers, wmappers, mappers)
                mappers = [
                    ChainMapper([zm, m]) for zm, m in zip(zmappers, mappers)
        elif params.alpha < 1:
            mappers = [
                ChainMapper([wm, m]) for wm, m in zip(wmappers, mappers)
        if params.output_dim is not None:
            mappers = [ChainMapper([m, self._svd_mapper]) for m in mappers]
        return mappers

    def _regularize(self, datasets, alpha):
        if __debug__:
                  "Using regularized hyperalignment with alpha of %d" % alpha)
        wmappers = []
        for ids in xrange(len(datasets)):
            U, S, Vh = np.linalg.svd(datasets[ids])
            S = 1 / np.sqrt((1 - alpha) * np.square(S) + alpha)
            S = np.matrix(np.diag(S))
            W = np.matrix(Vh.T) * S * np.matrix(Vh)
            wmapper = StaticProjectionMapper(proj=W, auto_train=False)
            datasets[ids] = wmapper.forward(datasets[ids])
        return datasets, wmappers

    def _level1(self, datasets, commonspace, ref_ds, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = [ds.samples for ds in datasets]
        counts = 1  # number of datasets used so far for generating commonspace
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
            # assign common space to ``space`` of the mapper, because this is
            # where it will be looking for it
            ds_new.sa[m.get_space()] = commonspace
            # find transformation of this dataset into the current common space
            # remove common space attribute again to save on memory when the
            # common space is updated for the next iteration
            del ds_new.sa[m.get_space()]
            # project this dataset into the current common space
            ds_ = m.forward(ds_new.samples)
            if params.zscore_common:
                zscore(ds_, chunks_attr=None)
            # replace original dataset with mapped one -- only the reference
            # dataset will remain unchanged
            data_mapped[i] = ds_

            # compute first-level residuals wrt to the initial common space
            if residuals is not None:
                residuals[0, i] = np.linalg.norm(ds_ - commonspace)

            # Update the common space. This is an incremental update after
            # processing each 1st-level dataset. Maybe there should be a flag
            # to make a batch update after processing all 1st-level datasets
            # to an identical 1st-level common space
            # TODO: make just a function so we dont' waste space
            if params.level1_equal_weight:
                commonspace = params.combiner1(ds_,
                                               weights=(float(counts), 1.0))
                commonspace = params.combiner1(ds_, commonspace)
            counts += 1
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)
        return data_mapped

    def _level2(self, datasets, lvl1_data, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = lvl1_data
        # aggregate all processed 1st-level datasets into a new 2nd-level
        # common space
        commonspace = params.combiner2(data_mapped)

        # XXX Why is this commented out? Who knows what combiner2 is doing and
        # whether it changes the distribution of the data
        #if params.zscore_common:
        #zscore(commonspace, chunks_attr=None)

        ndatasets = len(datasets)
        for loop in xrange(params.level2_niter):
            # 2nd-level alignment starts from the original/unprojected datasets
            # again
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                          "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                # Optimization speed up heuristic
                # Slightly modify the common space towards other feature
                # spaces and reduce influence of this feature space for the
                # to-be-computed projection
                temp_commonspace = (commonspace * ndatasets - data_mapped[i]) \
                                    / (ndatasets - 1)

                if params.zscore_common:
                    zscore(temp_commonspace, chunks_attr=None)
                # assign current common space
                ds_new.sa[m.get_space()] = temp_commonspace
                # retrain the mapper for this dataset
                # remove common space attribute again to save on memory when the
                # common space is updated for the next iteration
                del ds_new.sa[m.get_space()]
                # obtain the 2nd-level projection
                ds_ = m.forward(ds_new.samples)
                if params.zscore_common:
                    zscore(ds_, chunks_attr=None)
                # store for 2nd-level combiner
                data_mapped[i] = ds_
                # compute residuals
                if residuals is not None:
                    residuals[1 + loop, i] = np.linalg.norm(ds_ - commonspace)

            commonspace = params.combiner2(data_mapped)

        # and again
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # return the final common space
        return commonspace

    def _level3(self, datasets):
        params = self.params  # for quicker access ;)
        # create a mapper per dataset
        mappers = [deepcopy(params.alignment) for ds in datasets]

        # key different from level-2; the common space is uniform
        #temp_commonspace = commonspace
        # Fixing nproc=0
        if params.nproc == 0:
            from mvpa2.base import warning
            warning("nproc of 0 doesn't make sense. Setting nproc to 1.")
            params.nproc = 1
        # Checking for joblib, if not, set nproc to 1
        if params.nproc != 1:
            from mvpa2.base import externals, warning
            if not externals.exists('joblib'):
                    "Setting nproc different from 1 requires joblib package, which "
                    "does not seem to exist. Setting nproc to 1.")
                params.nproc = 1

        # start from original input datasets again
        if params.nproc == 1:
            residuals = []
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 3: ds #%i" % i)
                m, residual = get_trained_mapper(
                    ds_new, self.commonspace, m,
                if self.ca['residual_errors'].enabled:
            if __debug__:
                      "Level 3: Using joblib with nproc = %d " % params.nproc)
            verbose_level_parallel = 20 \
                if (__debug__ and 'HPAL' in debug.active) else 0
            from joblib import Parallel, delayed
            import sys
            # joblib's 'multiprocessing' backend has known issues of failure on OSX
            # Tested with MacOS 10.12.13, python 2.7.13, joblib v0.10.3
            if params.joblib_backend is None:
                params.joblib_backend = 'threading' if sys.platform == 'darwin' \
                                        else 'multiprocessing'
            res = Parallel(n_jobs=params.nproc,
                                   ds, self.commonspace, mapper,
                               for ds, mapper in zip(datasets, mappers))
            mappers = [m for m, r in res]
            if self.ca['residual_errors'].enabled:
                residuals = [r for m, r in res]

        if self.ca['residual_errors'].enabled:
            self.ca.residual_errors = Dataset(
                samples=np.array(residuals)[None, :])

        return mappers

    def _map_and_mean(self, datasets, mappers):
        params = self.params
        data_mapped = [[] for ds in datasets]
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Mapping training data for SVD: ds #%i" % i)
            ds_ = m.forward(ds_new.samples)
            # XXX should we zscore data before averaging and running SVD?
            # zscore(ds_, chunks_attr=None)
            data_mapped[i] = ds_
        dss_mean = params.combiner2(data_mapped)
        return dss_mean
Exemple #30
class Hyperalignment(ClassWithCollections):
    """Align the features across multiple datasets into a common feature space.

    This is a three-level algorithm. In the first level, a series of input
    datasets is projected into a common feature space using a configurable
    mapper. The common space is initially defined by a chosen exemplar from the
    list of input datasets, but is subsequently refined by iteratively combining
    the common space with the projected input datasets.

    In the second (optional) level, the original input datasets are again
    aligned with (or projected into) the intermediate first-level common
    space. Through a configurable number of iterations the common space is
    further refined by repeated projections of the input datasets and
    combination/aggregation of these projections into an updated common space.

    In the third level, the input datasets are again aligned with the, now
    final, common feature space. The output of this algorithm are trained
    mappers (one for each input dataset) that transform the individual features
    spaces into the common space.

    Level 1 and 2 are performed by the ``train()`` method, and level 3 is
    performed when the trained Hyperalignment instance is called with a list of
    datasets. This dataset list may or may not be identical to the training

    The default values for the parameters of the algorithm (e.g. projection via
    Procrustean transformation, common space aggregation by averaging) resemble
    the setup reported in :ref:`Haxby et al., Neuron (2011) <HGC+11>` *A common,
    high-dimensional model of the representational space in human ventral
    temporal cortex.*

    >>> # get some example data
    >>> from mvpa2.testing.datasets import datasets
    >>> from mvpa2.misc.data_generators import random_affine_transformation
    >>> ds4l = datasets['uni4large']
    >>> # generate a number of distorted variants of this data
    >>> dss = [random_affine_transformation(ds4l) for i in xrange(4)]
    >>> ha = Hyperalignment()
    >>> ha.train(dss)
    >>> mappers = ha(dss)
    >>> len(mappers)

    training_residual_errors = ConditionalAttribute(
        doc="""Residual error (norm of the difference between common space
                and projected data) per each training dataset at each level. The
                residuals are stored in a dataset with one row per level, and
                one column per input dataset. The first row corresponds to the
                error 1st-level of hyperalignment the remaining rows store the
                residual errors for each 2nd-level iteration.""")

    residual_errors = ConditionalAttribute(
        doc="""Residual error (norm of the difference between common space
                and projected data) per each dataset. The residuals are stored
                in a single-row dataset with one column per input dataset.""")

    # XXX Who cares whether it was chosen, or specified? This should be just
    # 'ref_ds'
    choosen_ref_ds = ConditionalAttribute(
        doc="""Index of the input dataset used as 1st-level reference

    # Lets use built-in facilities to specify parameters which
    # constructor should accept
    # the ``space`` of the mapper determines where the algorithm places the
    # common space definition in the datasets
    alignment = Parameter(
        ProcrusteanMapper(space='commonspace'),  # might provide allowedtype
        doc="""The multidimensional transformation mapper. If
            `None` (default) an instance of
            :class:`~mvpa2.mappers.procrustean.ProcrusteanMapper` is

    alpha = Parameter(
        doc="""Regularization parameter to traverse between (Shrinkage)-CCA
                (canonical correlation analysis) and regular hyperalignment.
                Setting alpha to 1 makes the algorithm identical to
                hyperalignment and alpha of 0 makes it CCA. By default,
                it is 1, therefore hyperalignment. """)

    level2_niter = Parameter(1,
                             doc="Number of 2nd-level iterations.")

    ref_ds = Parameter(
        doc="""Index of a dataset to use as 1st-level common space
                reference.  If `None`, then the dataset with the maximum
                number of features is used.""")

    zscore_all = Parameter(
        doc="""Flag to Z-score all datasets prior hyperalignment.
            Turn it off if Z-scoring is not desired or was already performed.
            If True, returned mappers are ChainMappers with the Z-scoring
            prepended to the actual projection.""")

    zscore_common = Parameter(
        doc="""Flag to Z-score the common space after each adjustment.
                This should be left enabled in most cases.""")

    combiner1 = Parameter(
        lambda x, y: 0.5 * (x + y),  #
        doc="""How to update common space in the 1st-level loop. This must
                be a callable that takes two arguments. The first argument is
                one of the input datasets after projection onto the 1st-level
                common space. The second argument is the current 1st-level
                common space. The 1st-level combiner is called iteratively for
                each projected input dataset, except for the reference dataset.
                By default the new common space is the average of the current
                common space and the recently projected dataset.""")

    combiner2 = Parameter(
        lambda l: np.mean(l, axis=0),
        doc="""How to combine all individual spaces to common space. This
            must be a callable that take a sequence of datasets as an argument.
            The callable must return a single array. This combiner is called
            once with all datasets after 1st-level projection to create an
            updated common space, and is subsequently called again after each
            2nd-level iteration.""")

    def __init__(self, **kwargs):
        ClassWithCollections.__init__(self, **kwargs)
        self.commonspace = None

    def train(self, datasets):
        """Derive a common feature space from a series of datasets.

        datasets : sequence of datasets

        A list of trained Mappers matching the number of input datasets.
        params = self.params  # for quicker access ;)
        ca = self.ca
        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]
        alpha = params.alpha

        residuals = None
        if ca['training_residual_errors'].enabled:
            residuals = np.zeros((1 + params.level2_niter, ndatasets))
            ca.training_residual_errors = Dataset(
                    ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)]

        if __debug__:
                  "Hyperalignment %s for %i datasets" % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
            ref_ds = params.ref_ds
            if ref_ds < 0 and ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.choosen_ref_ds = ref_ds
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # initial common space is the reference dataset
        commonspace = datasets[ref_ds].samples
        # the reference dataset might have been zscored already, don't do it
        # twice
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                    'HPAL_', "Creating copy of a commonspace and assuring "
                    "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)

        # create a mapper per dataset
        # might prefer some other way to initialize... later
        mappers = [deepcopy(params.alignment) for ds in datasets]

        # Level 1 -- initial projection
        lvl1_projdata = self._level1(datasets, commonspace, ref_ds, mappers,
        # Level 2 -- might iterate multiple times
        # this is the final common space
        self.commonspace = self._level2(datasets, lvl1_projdata, mappers,

    def __call__(self, datasets):
        """Derive a common feature space from a series of datasets.

        datasets : sequence of datasets

        A list of trained Mappers matching the number of input datasets.
        if self.commonspace is None:

        # place datasets into a copy of the list since items
        # will be reassigned
        datasets = list(datasets)

        params = self.params  # for quicker access ;)
        alpha = params.alpha  # for letting me be lazy ;)
        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            # zscore them once while storing corresponding ZScoreMapper's
            # so we can assemble a comprehensive mapper at the end
            # (together with procrustes)
            zmappers = []
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # Level 3 -- final, from-scratch, alignment to final common space
        mappers = self._level3(datasets)
        # return trained mappers for projection from all datasets into the
        # common space
        if params.zscore_all:
            # We need to construct new mappers which would chain
            # zscore and then final transformation
            if params.alpha < 1:
                return [
                    ChainMapper([zm, wm, m])
                    for zm, wm, m in zip(zmappers, wmappers, mappers)
                return [
                    ChainMapper([zm, m]) for zm, m in zip(zmappers, mappers)
            if params.alpha < 1:
                return [
                    ChainMapper([wm, m]) for wm, m in zip(wmappers, mappers)
                return mappers

    def _regularize(self, datasets, alpha):
        if __debug__:
                  "Using regularized hyperalignment with alpha of %d" % alpha)
        wmappers = []
        for ids in xrange(len(datasets)):
            U, S, Vh = np.linalg.svd(datasets[ids])
            S = 1 / np.sqrt((1 - alpha) * np.square(S) + alpha)
            S = np.matrix(np.diag(S))
            W = np.matrix(Vh.T) * S * np.matrix(Vh)
            wmapper = StaticProjectionMapper(proj=W)
            datasets[ids] = wmapper.forward(datasets[ids])
        return datasets, wmappers

    def _level1(self, datasets, commonspace, ref_ds, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = [ds.samples for ds in datasets]
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
            # assign common space to ``space`` of the mapper, because this is
            # where it will be looking for it
            ds_new.sa[m.get_space()] = commonspace
            # find transformation of this dataset into the current common space
            # remove common space attribute again to save on memory when the
            # common space is updated for the next iteration
            del ds_new.sa[m.get_space()]
            # project this dataset into the current common space
            ds_ = m.forward(ds_new.samples)
            if params.zscore_common:
                zscore(ds_, chunks_attr=None)
            # replace original dataset with mapped one -- only the reference
            # dataset will remain unchanged
            data_mapped[i] = ds_

            # compute first-level residuals wrt to the initial common space
            if residuals is not None:
                residuals[0, i] = np.linalg.norm(ds_ - commonspace)

            # Update the common space. This is an incremental update after
            # processing each 1st-level dataset. Maybe there should be a flag
            # to make a batch update after processing all 1st-level datasets
            # to an identical 1st-level common space
            # TODO: make just a function so we dont' waste space
            commonspace = params.combiner1(ds_, commonspace)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)
        return data_mapped

    def _level2(self, datasets, lvl1_data, mappers, residuals):
        params = self.params  # for quicker access ;)
        data_mapped = lvl1_data
        # aggregate all processed 1st-level datasets into a new 2nd-level
        # common space
        commonspace = params.combiner2(data_mapped)

        # XXX Why is this commented out? Who knows what combiner2 is doing and
        # whether it changes the distribution of the data
        #if params.zscore_common:
        #zscore(commonspace, chunks_attr=None)

        ndatasets = len(datasets)
        for loop in xrange(params.level2_niter):
            # 2nd-level alignment starts from the original/unprojected datasets
            # again
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                          "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                # Optimization speed up heuristic
                # Slightly modify the common space towards other feature
                # spaces and reduce influence of this feature space for the
                # to-be-computed projection
                temp_commonspace = (commonspace * ndatasets - data_mapped[i]) \
                                    / (ndatasets - 1)

                if params.zscore_common:
                    zscore(temp_commonspace, chunks_attr=None)
                # assign current common space
                ds_new.sa[m.get_space()] = temp_commonspace
                # retrain the mapper for this dataset
                # remove common space attribute again to save on memory when the
                # common space is updated for the next iteration
                del ds_new.sa[m.get_space()]
                # obtain the 2nd-level projection
                ds_ = m.forward(ds_new.samples)
                if params.zscore_common:
                    zscore(ds_, chunks_attr=None)
                # store for 2nd-level combiner
                data_mapped[i] = ds_
                # compute residuals
                if residuals is not None:
                    residuals[1 + loop, i] = np.linalg.norm(ds_ - commonspace)

            commonspace = params.combiner2(data_mapped)

        # and again
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # return the final common space
        return commonspace

    def _level3(self, datasets):
        params = self.params  # for quicker access ;)
        # create a mapper per dataset
        mappers = [deepcopy(params.alignment) for ds in datasets]

        # key different from level-2; the common space is uniform
        #temp_commonspace = commonspace

        residuals = None
        if self.ca['residual_errors'].enabled:
            residuals = np.zeros((1, len(datasets)))
            self.ca.residual_errors = Dataset(samples=residuals)

        # start from original input datasets again
        for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 3: ds #%i" % i)

            # retrain mapper on final common space
            ds_new.sa[m.get_space()] = self.commonspace
            # remove common space attribute again to save on memory
            del ds_new.sa[m.get_space()]

            if residuals is not None:
                # obtain final projection
                data_mapped = m.forward(ds_new.samples)
                          i] = np.linalg.norm(data_mapped - self.commonspace)

        return mappers