예제 #1
0
파일: base.py 프로젝트: gorlins/PyMVPA
    def labelVoxel(self, c, levels = None):

        if self.__referenceLevel is None:
            warning("You did not provide what level to use "
					"for reference. Assigning 0th level -- '%s'"
                    % (self._levels_dict[0],))
            self.setReferenceLevel(0)
            # return self.__referenceAtlas.labelVoxel(c, levels)

        c = self._checkRange(c)

        # obtain coordinates of the closest voxel
        cref = self._data[ self.__referenceLevel.indexes, c[2], c[1], c[0] ]
        dist = norm( (cref - c) * self.voxdim )
        if __debug__:
            debug('ATL__', "Closest referenced point for %s is "
                  "%s at distance %3.2f" % (`c`, `cref`, dist))
        if (self.distance - dist) >= 1e-3: # neglect everything smaller
            result = self.__referenceAtlas.labelVoxel(cref, levels)
            result['voxel_referenced'] = c
            result['distance'] = dist
        else:
            result = self.__referenceAtlas.labelVoxel(c, levels)
            if __debug__:
                debug('ATL__', "Closest referenced point is "
                      "further than desired distance %.2f" % self.distance)
            result['voxel_referenced'] = None
            result['distance'] = 0
        return result
예제 #2
0
 def __call__(self, predicted, target):
     """Requires all arguments."""
     from mvpa.base import warning
     warning(
         "p-value for correlation is implemented only when scipy is "
         "available. Bogus value -1.0 is returned otherwise")
     return -1.0
예제 #3
0
파일: eep.py 프로젝트: gorlins/PyMVPA
    def __init__(self, samples=None, **kwargs):
        """Initialize EEPDataset.

        :Parameters:
          samples: Filename (string) of a EEP binary file or an `EEPBin`
                   object
        """
        # dataset props defaults
        dt = t0 = channelids = None

        # default way to use the constructor: with filename
        if not samples is None:
            if isinstance(samples, str):
                # open the eep file
                try:
                    eb = EEPBin(samples)
                except RuntimeError, e:
                    warning("ERROR: EEPDatasets: Cannot open samples file %s" \
                            % samples) # should we make also error?
                    raise e
            elif isinstance(samples, EEPBin):
                # nothing special
                eb = samples
            else:
                raise ValueError, \
                      "EEPDataset constructor takes the filename of an " \
                      "EEP file or a EEPBin object as 'samples' argument."
            samples = eb.data
            dt = eb.dt
            channelids = eb.channels
            t0 = eb.t0
예제 #4
0
파일: support.py 프로젝트: gorlins/PyMVPA
def _getUniqueLengthNCombinations_binary(L, n=None, sort=True):
    """Find all subsets of data

    :Parameters:
      L : list
        list of unique ids
      n : None or int
        If None, all possible subsets are returned. If n is specified (int),
        then only the ones of the length n are returned
      sort : bool
        Either to sort the resultant sequence

    Adopted from Alex Martelli:
    http://mail.python.org/pipermail/python-list/2001-January/067815.html
    """
    N = len(L)
    if N > 20 and n == 1:
        warning("getUniqueLengthNCombinations_binary should not be used for "
                "large N")
    result = []
    for X in range(2**N):
        x = [ L[i] for i in range(N) if X & (1L<<i) ]
        if n is None or len(x) == n:
            # yield x # if we wanted to use it as a generator
            result.append(x)
    result.sort()
    # if __debug__ and n is not None:
    #     # verify the result
    #     # would need scipy... screw it
    #     assert(len(result) == ...)
    return result
예제 #5
0
파일: mri.py 프로젝트: geeragh/PyMVPA
def _load_anynifti(src, ensure=False, enforce_dim=None):
    """Load/access NIfTI data from files or instances.

    Parameters
    ----------
    src : str or NiftiImage
      Filename of a NIfTI image or a `NiftiImage` instance.
    ensure : bool, optional
      If True, throw ValueError exception if cannot be loaded.
    enforce_dim : int or None
      If not None, it is the dimensionality of the data to be enforced,
      commonly 4D for the data, and 3D for the mask in case of fMRI.

    Returns
    -------
    NiftiImage or None
      If the source is not supported None is returned.

    Raises
    ------
    ValueError
      If there is a problem with data (variable dimensionality) or
      failed to load data and ensure=True.
    """
    nifti = None

    # figure out what type
    if isinstance(src, str):
        # open the nifti file
        try:
            nifti = NiftiImage(src)
        except RuntimeError, e:
            warning("ERROR: Cannot open NIfTI file %s" % src)
            raise e
예제 #6
0
파일: smlr.py 프로젝트: geeragh/PyMVPA
    def __init__(self, **kwargs):
        """Initialize an SMLR classifier.
        """

        """
        TODO:
         # Add in likelihood calculation
         # Add kernels, not just direct methods.
         """
        # init base class first
        Classifier.__init__(self, **kwargs)

        if _cStepwiseRegression is None and self.params.implementation == 'C':
            warning('SMLR: C implementation is not available.'
                    ' Using pure Python one')
            self.params.implementation = 'Python'

        # pylint friendly initializations
        self._ulabels = None
        """Unigue labels from the training set."""
        self.__weights_all = None
        """Contains all weights including bias values"""
        self.__weights = None
        """Just the weights, without the biases"""
        self.__biases = None
        """The biases, will remain none if has_bias is False"""
예제 #7
0
파일: neighborhood.py 프로젝트: esc/PyMVPA
    def _get_increments(self, ndim):
        """Creates a list of increments for a given dimensionality

        RF: lame yoh just cut-pasted and tuned up because everything
            depends on ndim...
        """
        # Set element_sizes
        element_sizes = self._element_sizes
        if element_sizes is None:
            element_sizes = np.ones(ndim)
        else:
            if (ndim != len(element_sizes)):
                raise ValueError, \
                      "Dimensionality mismatch: element_sizes %s provided " \
                      "to constructor had %i dimensions, whenever queried " \
                      "coordinate had %i" \
                      % (element_sizes, len(element_sizes), ndim)
        center = np.zeros(ndim)

        element_sizes = np.asanyarray(element_sizes)
        # What range for each dimension
        erange = np.ceil(self._radius / element_sizes).astype(int)

        tentative_increments = np.array(list(np.ndindex(tuple(erange*2 + 1)))) \
                               - erange
        # Filter out the ones beyond the "sphere"
        res = array([x for x in tentative_increments
                      if self._inner_radius
                      < self._distance_func(x * element_sizes, center)
                      <= self._radius])

        if not len(res):
            warning("%s defines no neighbors" % self)
        return res
예제 #8
0
    def __init__(self, **kwargs):
        """Initialize an SMLR classifier.
        """
        """
        TODO:
         # Add in likelihood calculation
         # Add kernels, not just direct methods.
         """
        # init base class first
        Classifier.__init__(self, **kwargs)

        if _cStepwiseRegression is None and self.params.implementation == 'C':
            warning('SMLR: C implementation is not available.'
                    ' Using pure Python one')
            self.params.implementation = 'Python'

        # pylint friendly initializations
        self._ulabels = None
        """Unigue labels from the training set."""
        self.__weights_all = None
        """Contains all weights including bias values"""
        self.__weights = None
        """Just the weights, without the biases"""
        self.__biases = None
        """The biases, will remain none if has_bias is False"""
예제 #9
0
파일: base.py 프로젝트: geeragh/PyMVPA
    def train(self, dataset):
        """Train classifier on a dataset

        Shouldn't be overridden in subclasses unless explicitly needed
        to do so
        """
        if dataset.nfeatures == 0 or dataset.nsamples == 0:
            raise DegenerateInputError, \
                  "Cannot train classifier on degenerate data %s" % dataset
        if __debug__:
            debug("CLF", "Training classifier %(clf)s on dataset %(dataset)s",
                  msgargs={'clf':self, 'dataset':dataset})

        self._pretrain(dataset)

        # remember the time when started training
        t0 = time.time()

        if dataset.nfeatures > 0:

            result = self._train(dataset)
        else:
            warning("Trying to train on dataset with no features present")
            if __debug__:
                debug("CLF",
                      "No features present for training, no actual training " \
                      "is called")
            result = None

        self.ca.training_time = time.time() - t0
        self._posttrain(dataset)
        return result
예제 #10
0
def _SLcholesky_autoreg(C, nsteps=None, **kwargs):
    """Simple wrapper around cholesky to incrementally regularize the
    matrix until successful computation.

    For `nsteps` we boost diagonal 10-fold each time from the
    'epsilon' of the respective dtype. If None -- would proceed until
    reaching 1.
    """
    if nsteps is None:
        nsteps = -int(np.floor(np.log10(np.finfo(float).eps)))
    result = None
    for step in xrange(nsteps):
        epsilon_value = (10**step) * np.finfo(C.dtype).eps
        epsilon = epsilon_value * np.eye(C.shape[0])
        try:
            result = SLcholesky(C + epsilon, lower=True)
        except SLAError, e:
            warning("Cholesky decomposition lead to failure: %s.  "
                    "As requested, performing auto-regularization but "
                    "for better control you might prefer to regularize "
                    "yourself by providing lm parameter to GPR" % e)
            if step < nsteps - 1:
                if __debug__:
                    debug(
                        "GPR", "Failed to obtain cholesky on "
                        "auto-regularization step %d value %g. Got %s."
                        " Boosting lambda more to reg. C." %
                        (step, epsilon_value, e))
                continue
            else:
                raise
예제 #11
0
파일: svm.py 프로젝트: gorlins/PyMVPA
def _setdebug(obj, partname):
    """Helper to set level of debugging output for SG
    :Parameters:
      obj
        In SG debug output seems to be set per every object
      partname : basestring
        For what kind of object we are talking about... could be automated
        later on (TODO)
    """
    debugname = "SG_%s" % partname.upper()

    switch = {True: (shogun.Kernel.M_DEBUG, 'M_DEBUG', "enable"),
              False: (shogun.Kernel.M_ERROR, 'M_ERROR', "disable")}

    key = __debug__ and debugname in debug.active

    sglevel, slevel, progressfunc = switch[key]

    if __debug__:
        debug("SG_", "Setting verbosity for shogun.%s instance: %s to %s" %
              (partname, `obj`, slevel))
    obj.io.set_loglevel(sglevel)
    try:
        exec "obj.io.%s_progress()" % progressfunc
    except:
        warning("Shogun version installed has no way to enable progress" +
                " reports")
예제 #12
0
파일: gpr.py 프로젝트: B-Rich/PyMVPA
def _SLcholesky_autoreg(C, nsteps=None, **kwargs):
    """Simple wrapper around cholesky to incrementally regularize the
    matrix until successful computation.

    For `nsteps` we boost diagonal 10-fold each time from the
    'epsilon' of the respective dtype. If None -- would proceed until
    reaching 1.
    """
    if nsteps is None:
        nsteps = -int(np.floor(np.log10(np.finfo(float).eps)))
    result = None
    for step in xrange(nsteps):
        epsilon_value = (10**step) * np.finfo(C.dtype).eps
        epsilon = epsilon_value * np.eye(C.shape[0])
        try:
            result = SLcholesky(C + epsilon, lower=True)
        except SLAError, e:
            warning("Cholesky decomposition lead to failure: %s.  "
                    "As requested, performing auto-regularization but "
                    "for better control you might prefer to regularize "
                    "yourself by providing lm parameter to GPR" % e)
            if step < nsteps-1:
                if __debug__:
                    debug("GPR", "Failed to obtain cholesky on "
                          "auto-regularization step %d value %g. Got %s."
                          " Boosting lambda more to reg. C."
                          % (step, epsilon_value, e))
                continue
            else:
                raise
예제 #13
0
파일: nifti.py 프로젝트: gorlins/PyMVPA
def getNiftiFromAnySource(src, ensure=False, enforce_dim=None):
    """Load/access NIfTI data from files or instances.

    :Parameters:
      src: str | NiftiImage
        Filename of a NIfTI image or a `NiftiImage` instance.
      ensure : bool
        If True, through ValueError exception if cannot be loaded.
      enforce_dim : int or None
        If not None, it is the dimensionality of the data to be enforced,
        commonly 4D for the data, and 3D for the mask in case of fMRI.

    :Returns:
      NiftiImage | None
        If the source is not supported None is returned.
    """
    nifti = None

    # figure out what type
    if isinstance(src, str):
        # open the nifti file
        try:
            nifti = NiftiImage(src)
        except RuntimeError, e:
            warning("ERROR: NiftiDatasets: Cannot open NIfTI file %s" \
                    % src)
            raise e
예제 #14
0
파일: transerror.py 프로젝트: B-Rich/PyMVPA
    def _precall(self, testdataset, trainingdataset=None):
        """Generic part which trains the classifier if necessary
        """
        if not trainingdataset is None:
            if self.__train:
                # XXX can be pretty annoying if triggered inside an algorithm
                # where it cannot be switched of, but retraining might be
                # intended or at least not avoidable.
                # Additonally is_trained docs say:
                #   MUST BE USED WITH CARE IF EVER
                #
                # switching it off for now
                #if self.__clf.is_trained(trainingdataset):
                #    warning('It seems that classifier %s was already trained' %
                #            self.__clf + ' on dataset %s. Please inspect' \
                #                % trainingdataset)
                if self.ca.is_enabled('training_stats'):
                    self.__clf.ca.change_temporarily(
                        enable_ca=['training_stats'])
                self.__clf.train(trainingdataset)
                if self.ca.is_enabled('training_stats'):
                    self.ca.training_stats = \
                        self.__clf.ca.training_stats
                    self.__clf.ca.reset_changed_temporarily()

        if self.__clf.ca.is_enabled('trained_targets') \
               and not self.__clf.__is_regression__ \
               and not testdataset is None:
            newlabels = set(testdataset.sa[self.clf.get_space()].unique) \
                        - set(self.__clf.ca.trained_targets)
            if len(newlabels)>0:
                warning("Classifier %s wasn't trained to classify labels %s" %
                        (self.__clf, newlabels) +
                        " present in testing dataset. Make sure that you have" +
                        " not mixed order/names of the arguments anywhere")
예제 #15
0
    def test_confusion_based_error(self, l_clf):
        train = datasets['uni2medium']
        train = train[train.sa.train == 1]
        # to check if we fail to classify for 3 labels
        test3 = datasets['uni3medium']
        test3 = test3[test3.sa.train == 1]
        err = ConfusionBasedError(clf=l_clf)
        terr = TransferMeasure(l_clf, Splitter('train', attr_values=[1,1]),
                               postproc=BinaryFxNode(mean_mismatch_error,
                                                     'targets'))

        self.failUnlessRaises(UnknownStateError, err, None)
        """Shouldn't be able to access the state yet"""

        l_clf.train(train)
        e, te = err(None), terr(train)
        te = np.asscalar(te)
        self.failUnless(abs(e-te) < 1e-10,
            msg="ConfusionBasedError (%.2g) should be equal to TransferError "
                "(%.2g) on traindataset" % (e, te))

        # this will print nasty WARNING but it is ok -- it is just checking code
        # NB warnings are not printed while doing whole testing
        warning("Don't worry about the following warning.")
        if 'multiclass' in l_clf.__tags__:
            self.failIf(terr(test3) is None)

        # try copying the beast
        terr_copy = copy(terr)
예제 #16
0
    def label_voxel(self, c, levels=None):

        if self.__referenceLevel is None:
            warning("You did not provide what level to use "
                    "for reference. Assigning 0th level -- '%s'" %
                    (self._levels[0], ))
            self.set_reference_level(0)
            # return self.__referenceAtlas.label_voxel(c, levels)

        c = self._check_range(c)

        # obtain coordinates of the closest voxel
        cref = self._data[self.__referenceLevel.indexes, c[2], c[1], c[0]]
        dist = norm((cref - c) * self.voxdim)
        if __debug__:
            debug(
                'ATL__', "Closest referenced point for %r is "
                "%r at distance %3.2f" % (c, cref, dist))
        if (self.distance - dist) >= 1e-3:  # neglect everything smaller
            result = self.__referenceAtlas.label_voxel(cref, levels)
            result['voxel_referenced'] = c
            result['distance'] = dist
        else:
            result = self.__referenceAtlas.label_voxel(c, levels)
            if __debug__:
                debug(
                    'ATL__', "Closest referenced point is "
                    "further than desired distance %.2f" % self.distance)
            result['voxel_referenced'] = None
            result['distance'] = 0
        return result
예제 #17
0
파일: base.py 프로젝트: esc/PyMVPA
    def _call(self, ds):
        # local binding
        generator = self._generator
        node = self._node
        ca = self.ca
        space = self.get_space()
        concat_as = self._concat_as

        if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or
                                            not node.ca.is_enabled("stats")):
            warning("'stats' conditional attribute was enabled, but "
                    "the assigned node '%s' either doesn't support it, "
                    "or it is disabled" % node)
        # precharge conditional attributes
        ca.datasets = []

        # run the node an all generated datasets
        results = []
        for i, sds in enumerate(generator.generate(ds)):
            if ca.is_enabled("datasets"):
                # store dataset in ca
                ca.datasets.append(sds)
            # run the beast
            result = node(sds)
            # callback
            if not self._callback is None:
                self._callback(data=sds, node=node, result=result)
            # subclass postprocessing
            result = self._repetition_postcall(sds, node, result)
            if space:
                # XXX maybe try to get something more informative from the
                # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks'
                # to indicate what was trained and what was tested. Now it is
                # more tricky, because `node` could be anything
                result.set_attr(space, (i,))
            # store
            results.append(result)

            if ca.is_enabled("stats") and node.ca.has_key("stats") \
               and node.ca.is_enabled("stats"):
                if not ca.is_set('stats'):
                    # create empty stats container of matching type
                    ca.stats = node.ca['stats'].value.__class__()
                # harvest summary stats
                ca['stats'].value.__iadd__(node.ca['stats'].value)

        # charge condition attribute
        self.ca.repetition_results = results

        # stack all results into a single Dataset
        if concat_as == 'samples':
            results = vstack(results)
        elif concat_as == 'features':
            results = hstack(results)
        else:
            raise ValueError("Unkown concatenation mode '%s'" % concat_as)
        # no need to store the raw results, since the Measure class will
        # automatically store them in a CA
        return results
예제 #18
0
def seed(random_seed):
    if __debug__:
        debug('SG', "Seeding shogun's RNG with %s" % random_seed)
    try:
        # reuse the same seed for shogun
        shogun.Library.Math_init_random(random_seed)
    except Exception, e:
        warning('Shogun cannot be seeded due to %s' % (e, ))
예제 #19
0
파일: errorfx.py 프로젝트: B-Rich/PyMVPA
 def corr_error_prob(predicted, target):
     """Computes p-value of correlation between the target and the predicted
     values.
     """
     from mvpa.base import warning
     warning("p-value for correlation is implemented only when scipy is "
             "available. Bogus value -1.0 is returned otherwise")
     return -1.0
예제 #20
0
파일: svm.py 프로젝트: B-Rich/PyMVPA
def seed(random_seed):
    if __debug__:
        debug('SG', "Seeding shogun's RNG with %s" % random_seed)
    try:
        # reuse the same seed for shogun
        shogun.Library.Math_init_random(random_seed)
    except Exception, e:
        warning('Shogun cannot be seeded due to %s' % (e,))
예제 #21
0
def _pvalue(x, cdf_func, tail, return_tails=False, name=None):
    """Helper function to return p-value(x) given cdf and tail

    Parameters
    ----------
    cdf_func : callable
      Function to be used to derive cdf values for x
    tail : str ('left', 'right', 'any', 'both')
      Which tail of the distribution to report. For 'any' and 'both'
      it chooses the tail it belongs to based on the comparison to
      p=0.5. In the case of 'any' significance is taken like in a
      one-tailed test.
    return_tails : bool
      If True, a tuple return (pvalues, tails), where tails contain
      1s if value was from the right tail, and 0 if the value was
      from the left tail.
    """
    is_scalar = np.isscalar(x)
    if is_scalar:
        x = [x]

    cdf = cdf_func(x)

    if __debug__ and 'CHECK_STABILITY' in debug.active:
        cdf_min, cdf_max = np.min(cdf), np.max(cdf)
        if cdf_min < 0 or cdf_max > 1.0:
            s = ('', ' for %s' % name)[int(name is not None)]
            warning('Stability check of cdf %s failed%s. Min=%s, max=%s' % \
                  (cdf_func, s, cdf_min, cdf_max))

    # no escape but to assure that CDF is in the right range. Some
    # distributions from scipy tend to jump away from [0,1]
    cdf = np.clip(cdf, 0, 1.0)

    if tail == 'left':
        if return_tails:
            right_tail = np.zeros(cdf.shape, dtype=bool)
    elif tail == 'right':
        cdf = 1 - cdf
        if return_tails:
            right_tail = np.ones(cdf.shape, dtype=bool)
    elif tail in ('any', 'both'):
        right_tail = (cdf >= 0.5)
        cdf[right_tail] = 1.0 - cdf[right_tail]
        if tail == 'both':
            # we need report the area under both tails
            # XXX this is only meaningful for symetric distributions
            cdf *= 2

    # Assure that NaNs didn't get significant value
    cdf[np.isnan(x)] = 1.0
    if is_scalar: res = cdf[0]
    else: res = cdf

    if return_tails:
        return (res, right_tail)
    else:
        return res
예제 #22
0
파일: stats.py 프로젝트: arokem/PyMVPA
def _pvalue(x, cdf_func, tail, return_tails=False, name=None):
    """Helper function to return p-value(x) given cdf and tail

    Parameters
    ----------
    cdf_func : callable
      Function to be used to derive cdf values for x
    tail : str ('left', 'right', 'any', 'both')
      Which tail of the distribution to report. For 'any' and 'both'
      it chooses the tail it belongs to based on the comparison to
      p=0.5. In the case of 'any' significance is taken like in a
      one-tailed test.
    return_tails : bool
      If True, a tuple return (pvalues, tails), where tails contain
      1s if value was from the right tail, and 0 if the value was
      from the left tail.
    """
    is_scalar = np.isscalar(x)
    if is_scalar:
        x = [x]

    cdf = cdf_func(x)

    if __debug__ and 'CHECK_STABILITY' in debug.active:
        cdf_min, cdf_max = np.min(cdf), np.max(cdf)
        if cdf_min < 0 or cdf_max > 1.0:
            s = ('', ' for %s' % name)[int(name is not None)]
            warning('Stability check of cdf %s failed%s. Min=%s, max=%s' % \
                  (cdf_func, s, cdf_min, cdf_max))

    # no escape but to assure that CDF is in the right range. Some
    # distributions from scipy tend to jump away from [0,1]
    cdf = np.clip(cdf, 0, 1.0)

    if tail == 'left':
        if return_tails:
            right_tail = np.zeros(cdf.shape, dtype=bool)
    elif tail == 'right':
        cdf = 1 - cdf
        if return_tails:
            right_tail = np.ones(cdf.shape, dtype=bool)
    elif tail in ('any', 'both'):
        right_tail = (cdf >= 0.5)
        cdf[right_tail] = 1.0 - cdf[right_tail]
        if tail == 'both':
            # we need report the area under both tails
            # XXX this is only meaningful for symetric distributions
            cdf *= 2

    # Assure that NaNs didn't get significant value
    cdf[np.isnan(x)] = 1.0
    if is_scalar: res = cdf[0]
    else:         res = cdf

    if return_tails:
        return (res, right_tail)
    else:
        return res
예제 #23
0
파일: searchlight.py 프로젝트: esc/PyMVPA
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results, roi_sizes = self._sl_call(dataset, roi_ids, nproc)

        if not roi_sizes is None:
            self.ca.roi_sizes = roi_sizes

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
예제 #24
0
파일: stats.py 프로젝트: arokem/PyMVPA
    def fit(self, measure, wdata, vdata=None):
        """Fit the distribution by performing multiple cycles which repeatedly
        permuted labels in the training dataset.

        Parameters
        ----------
        measure: (`Featurewise`)`DatasetMeasure` or `TransferError`
          TransferError instance used to compute all errors.
        wdata: `Dataset` which gets permuted and used to compute the
          measure/transfer error multiple times.
        vdata: `Dataset` used for validation.
          If provided measure is assumed to be a `TransferError` and
          working and validation dataset are passed onto it.
        """
        # TODO: place exceptions separately so we could avoid circular imports
        from mvpa.clfs.base import LearnerError

        dist_samples = []
        """Holds the values for randomized labels."""

        # estimate null-distribution
        for p in xrange(self.__permutations):
            # new permutation all the time
            # but only permute the training data and keep the testdata constant
            #
            if __debug__:
                debug('STATMC', "Doing %i permutations: %i" \
                      % (self.__permutations, p+1), cr=True)

            # TODO this really needs to be more clever! If data samples are
            # shuffled within a class it really makes no difference for the
            # classifier, hence the number of permutations to estimate the
            # null-distribution of transfer errors can be reduced dramatically
            # when the *right* permutations (the ones that matter) are done.
            permuted_wdata = wdata.copy('shallow')
            permuted_wdata.permute_attr(
                attr=self.permute_attr,
                chunks_attr=self.chunks_attr,
                col=self.permute_col,
                assure_permute=self.assure_permute)

            # decide on the arguments to measure
            if not vdata is None:
                measure_args = [vdata, permuted_wdata]
            else:
                measure_args = [permuted_wdata]

            # compute and store the measure of this permutation
            # assume it has `TransferError` interface
            try:
                res = measure(*measure_args)
            except LearnerError, e:
                warning('Failed to obtain value from %s due to %s.  Measurement'
                        ' was skipped, which could lead to unstable and/or'
                        ' incorrect assessment of the null_dist' % (measure, e))
            res = np.asanyarray(res)
            dist_samples.append(res)
예제 #25
0
    def fit(self, measure, wdata, vdata=None):
        """Fit the distribution by performing multiple cycles which repeatedly
        permuted labels in the training dataset.

        Parameters
        ----------
        measure: (`Featurewise`)`DatasetMeasure` or `TransferError`
          TransferError instance used to compute all errors.
        wdata: `Dataset` which gets permuted and used to compute the
          measure/transfer error multiple times.
        vdata: `Dataset` used for validation.
          If provided measure is assumed to be a `TransferError` and
          working and validation dataset are passed onto it.
        """
        # TODO: place exceptions separately so we could avoid circular imports
        from mvpa.clfs.base import LearnerError

        dist_samples = []
        """Holds the values for randomized labels."""

        # estimate null-distribution
        for p in xrange(self.__permutations):
            # new permutation all the time
            # but only permute the training data and keep the testdata constant
            #
            if __debug__:
                debug('STATMC', "Doing %i permutations: %i" \
                      % (self.__permutations, p+1), cr=True)

            # TODO this really needs to be more clever! If data samples are
            # shuffled within a class it really makes no difference for the
            # classifier, hence the number of permutations to estimate the
            # null-distribution of transfer errors can be reduced dramatically
            # when the *right* permutations (the ones that matter) are done.
            permuted_wdata = wdata.copy('shallow')
            permuted_wdata.permute_attr(attr=self.permute_attr,
                                        chunks_attr=self.chunks_attr,
                                        col=self.permute_col,
                                        assure_permute=self.assure_permute)

            # decide on the arguments to measure
            if not vdata is None:
                measure_args = [vdata, permuted_wdata]
            else:
                measure_args = [permuted_wdata]

            # compute and store the measure of this permutation
            # assume it has `TransferError` interface
            try:
                res = measure(*measure_args)
            except LearnerError, e:
                warning(
                    'Failed to obtain value from %s due to %s.  Measurement'
                    ' was skipped, which could lead to unstable and/or'
                    ' incorrect assessment of the null_dist' % (measure, e))
            res = np.asanyarray(res)
            dist_samples.append(res)
예제 #26
0
파일: base.py 프로젝트: gorlins/PyMVPA
    def _setRetrainable(self, value, force=False):
        """Assign value of retrainable parameter

        If retrainable flag is to be changed, classifier has to be
        untrained.  Also internal attributes such as _changedData,
        __changedData_isset, and __idhashes should be initialized if
        it becomes retrainable
        """
        pretrainable = self.params['retrainable']
        if (force or value != pretrainable.value) \
               and 'retrainable' in self._clf_internals:
            if __debug__:
                debug("CLF_", "Setting retrainable to %s" % value)
            if 'meta' in self._clf_internals:
                warning("Retrainability is not yet crafted/tested for "
                        "meta classifiers. Unpredictable behavior might occur")
            # assure that we don't drag anything behind
            if self.trained:
                self.untrain()
            states = self.states
            if not value and states.isKnown('retrained'):
                states.remove('retrained')
                states.remove('repredicted')
            if value:
                if not 'retrainable' in self._clf_internals:
                    warning("Setting of flag retrainable for %s has no effect"
                            " since classifier has no such capability. It would"
                            " just lead to resources consumption and slowdown"
                            % self)
                states.add(StateVariable(enabled=True,
                        name='retrained',
                        doc="Either retrainable classifier was retrained"))
                states.add(StateVariable(enabled=True,
                        name='repredicted',
                        doc="Either retrainable classifier was repredicted"))

            pretrainable.value = value

            # if retrainable we need to keep track of things
            if value:
                self.__idhashes = {'traindata': None, 'labels': None,
                                   'testdata': None} #, 'testtraindata': None}
                if __debug__ and 'CHECK_RETRAIN' in debug.active:
                    # ??? it is not clear though if idhash is faster than
                    # simple comparison of (dataset != __traineddataset).any(),
                    # but if we like to get rid of __traineddataset then we
                    # should use idhash anyways
                    self.__trained = self.__idhashes.copy() # just same Nones
                self.__resetChangedData()
                self.__invalidatedChangedData = {}
            elif 'retrainable' in self._clf_internals:
                #self.__resetChangedData()
                self.__changedData_isset = False
                self._changedData = None
                self.__idhashes = None
                if __debug__ and 'CHECK_RETRAIN' in debug.active:
                    self.__trained = None
예제 #27
0
파일: svm.py 프로젝트: arokem/PyMVPA
    def _predict(self, data):
        """Predict values for the data
        """
        # libsvm needs doubles
        src = _data2ls(data)
        ca = self.ca

        predictions = [ self.model.predict(p) for p in src ]

        if ca.is_enabled('estimates'):
            if self.__is_regression__:
                estimates = [ self.model.predict_values_raw(p)[0] for p in src ]
            else:
                # if 'trained_targets' are literal they have to be mapped
                if np.issubdtype(self.ca.trained_targets.dtype, 'c'):
                    trained_targets = self._attrmap.to_numeric(
                            self.ca.trained_targets)
                else:
                    trained_targets = self.ca.trained_targets
                nlabels = len(trained_targets)
                # XXX We do duplicate work. model.predict calls
                # predict_values_raw internally and then does voting or
                # thresholding. So if speed becomes a factor we might
                # want to move out logic from libsvm over here to base
                # predictions on obtined values, or adjust libsvm to
                # spit out values from predict() as well
                if nlabels == 2:
                    # Apperently libsvm reorders labels so we need to
                    # track (1,0) values instead of (0,1) thus just
                    # lets take negative reverse
                    estimates = [ self.model.predict_values(p)[(trained_targets[1],
                                                            trained_targets[0])]
                               for p in src ]
                    if len(estimates) > 0:
                        if __debug__:
                            debug("SVM",
                                  "Forcing estimates to be ndarray and reshaping"
                                  " them into 1D vector")
                        estimates = np.asarray(estimates).reshape(len(estimates))
                else:
                    # In multiclass we return dictionary for all pairs
                    # of labels, since libsvm does 1-vs-1 pairs
                    estimates = [ self.model.predict_values(p) for p in src ]
            ca.estimates = estimates

        if ca.is_enabled("probabilities"):
            # XXX Is this really necesssary? yoh don't think so since
            # assignment to ca is doing the same
            #self.probabilities = [ self.model.predict_probability(p)
            #                       for p in src ]
            try:
                ca.probabilities = [ self.model.predict_probability(p)
                                         for p in src ]
            except TypeError:
                warning("Current SVM %s doesn't support probability " %
                        self + " estimation.")
        return predictions
예제 #28
0
파일: base.py 프로젝트: B-Rich/PyMVPA
    def _set_retrainable(self, value, force=False):
        """Assign value of retrainable parameter

        If retrainable flag is to be changed, classifier has to be
        untrained.  Also internal attributes such as _changedData,
        __changedData_isset, and __idhashes should be initialized if
        it becomes retrainable
        """
        pretrainable = self.params["retrainable"]
        if (force or value != pretrainable.value) and "retrainable" in self.__tags__:
            if __debug__:
                debug("CLF_", "Setting retrainable to %s" % value)
            if "meta" in self.__tags__:
                warning(
                    "Retrainability is not yet crafted/tested for "
                    "meta classifiers. Unpredictable behavior might occur"
                )
            # assure that we don't drag anything behind
            if self.trained:
                self.untrain()
            ca = self.ca
            if not value and ca.has_key("retrained"):
                ca.pop("retrained")
                ca.pop("repredicted")
            if value:
                if not "retrainable" in self.__tags__:
                    warning(
                        "Setting of flag retrainable for %s has no effect"
                        " since classifier has no such capability. It would"
                        " just lead to resources consumption and slowdown" % self
                    )
                ca["retrained"] = ConditionalAttribute(enabled=True, doc="Either retrainable classifier was retrained")
                ca["repredicted"] = ConditionalAttribute(
                    enabled=True, doc="Either retrainable classifier was repredicted"
                )

            pretrainable.value = value

            # if retrainable we need to keep track of things
            if value:
                self.__idhashes = {"traindata": None, "targets": None, "testdata": None}  # , 'testtraindata': None}
                if __debug__ and "CHECK_RETRAIN" in debug.active:
                    # ??? it is not clear though if idhash is faster than
                    # simple comparison of (dataset != __traineddataset).any(),
                    # but if we like to get rid of __traineddataset then we
                    # should use idhash anyways
                    self.__trained = self.__idhashes.copy()  # just same Nones
                self.__reset_changed_data()
                self.__invalidatedChangedData = {}
            elif "retrainable" in self.__tags__:
                # self.__reset_changed_data()
                self.__changedData_isset = False
                self._changedData = None
                self.__idhashes = None
                if __debug__ and "CHECK_RETRAIN" in debug.active:
                    self.__trained = None
예제 #29
0
파일: svm.py 프로젝트: gorlins/PyMVPA
    def _predict(self, data):
        """Predict values for the data
        """
        # libsvm needs doubles
        if data.dtype == 'float64':
            src = data
        else:
            src = data.astype('double')
        states = self.states

        predictions = [ self.model.predict(p) for p in src ]

        if states.isEnabled("values"):
            if self.regression:
                values = [ self.model.predictValuesRaw(p)[0] for p in src ]
            else:
                trained_labels = self.trained_labels
                nlabels = len(trained_labels)
                # XXX We do duplicate work. model.predict calls
                # predictValuesRaw internally and then does voting or
                # thresholding. So if speed becomes a factor we might
                # want to move out logic from libsvm over here to base
                # predictions on obtined values, or adjust libsvm to
                # spit out values from predict() as well
                if nlabels == 2:
                    # Apperently libsvm reorders labels so we need to
                    # track (1,0) values instead of (0,1) thus just
                    # lets take negative reverse
                    values = [ self.model.predictValues(p)[(trained_labels[1],
                                                            trained_labels[0])]
                               for p in src ]
                    if len(values) > 0:
                        if __debug__:
                            debug("SVM",
                                  "Forcing values to be ndarray and reshaping"
                                  " them into 1D vector")
                        values = N.asarray(values).reshape(len(values))
                else:
                    # In multiclass we return dictionary for all pairs
                    # of labels, since libsvm does 1-vs-1 pairs
                    values = [ self.model.predictValues(p) for p in src ]
            states.values = values

        if states.isEnabled("probabilities"):
            # XXX Is this really necesssary? yoh don't think so since
            # assignment to states is doing the same
            #self.probabilities = [ self.model.predictProbability(p)
            #                       for p in src ]
            try:
                states.probabilities = [ self.model.predictProbability(p)
                                         for p in src ]
            except TypeError:
                warning("Current SVM %s doesn't support probability " %
                        self + " estimation.")
        return predictions
예제 #30
0
파일: sens.py 프로젝트: gorlins/PyMVPA
    def _call(self, dataset, callables=[]):
        # local bindings
        model = self.clf.model
        nr_class = model.nr_class

        if nr_class != 2:
            warning("You are estimating sensitivity for SVM %s trained on %d" %
                    (str(self.clf), self.clf.model.nr_class) +
                    " classes. Make sure that it is what you intended to do" )

        svcoef = N.matrix(model.getSVCoef())
        svs = N.matrix(model.getSV())
        rhos = N.asarray(model.getRho())

        self.biases = rhos
        if self.split_weights:
            if nr_class != 2:
                raise NotImplementedError, \
                      "Cannot compute per-class weights for" \
                      " non-binary classification task"
            # libsvm might have different idea on the ordering
            # of labels, so we would need to map them back explicitely
            svm_labels = model.getLabels() # labels as assigned by libsvm
            ds_labels = list(dataset.uniquelabels) # labels in the dataset
            senses = [None for i in ds_labels]
            # first label is given positive value
            for i, (c, l) in enumerate( [(svcoef > 0, lambda x: x),
                                         (svcoef < 0, lambda x: x*-1)] ):
                # convert to array, and just take the meaningful dimension
                c_ = c.A[0]
                senses[ds_labels.index(svm_labels[i])] = \
                                (l(svcoef[:, c_] * svs[c_, :])).A[0]
            weights = N.array(senses)
        else:
            # XXX yoh: .mean() is effectively
            # averages across "sensitivities" of all paired classifiers (I
            # think). See more info on this topic in svm.py on how sv_coefs
            # are stored
            #
            # First multiply SV coefficients with the actuall SVs to get
            # weighted impact of SVs on decision, then for each feature
            # take mean across SVs to get a single weight value
            # per feature
            weights = svcoef * svs

        if __debug__:
            debug('SVM',
                  "Extracting weights for %d-class SVM: #SVs=%s, " % \
                  (nr_class, str(model.getNSV())) + \
                  " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \
                  (svcoef.shape, svs.shape, rhos) + \
                  " Result: min=%f max=%f" % (N.min(weights), N.max(weights)))

        return N.asarray(weights.T)
예제 #31
0
파일: stats.py 프로젝트: esc/PyMVPA
    def fit(self, measure, ds):
        """Fit the distribution by performing multiple cycles which repeatedly
        permuted labels in the training dataset.

        Parameters
        ----------
        measure: Measure or None
          A measure used to compute the results from shuffled data. Can be None
          if a measure instance has been provided to the constructor.
        ds: `Dataset` which gets permuted and used to compute the
          measure/transfer error multiple times.
        """
        # TODO: place exceptions separately so we could avoid circular imports
        from mvpa.base.learner import LearnerError

        # prefer the already assigned measure over anything the was passed to
        # the function.
        # XXX that is a bit awkward but is necessary to keep the code changes
        # in the rest of PyMVPA minimal till this behavior become mandatory
        if not self._measure is None:
            measure = self._measure
            measure.untrain()

        dist_samples = []
        """Holds the values for randomized labels."""

        # estimate null-distribution
        # TODO this really needs to be more clever! If data samples are
        # shuffled within a class it really makes no difference for the
        # classifier, hence the number of permutations to estimate the
        # null-distribution of transfer errors can be reduced dramatically
        # when the *right* permutations (the ones that matter) are done.
        skipped = 0                     # # of skipped permutations
        for p, permuted_ds in enumerate(self.__permutator.generate(ds)):
            # new permutation all the time
            # but only permute the training data and keep the testdata constant
            #
            if __debug__:
                debug('STATMC', "Doing %i permutations: %i" \
                      % (self.__permutator.nruns, p+1), cr=True)

            # compute and store the measure of this permutation
            # assume it has `TransferError` interface
            try:
                res = measure(permuted_ds)
                dist_samples.append(res.samples)
            except LearnerError, e:
                if __debug__:
                    debug('STATMC', " skipped", cr=True)
                warning('Failed to obtain value from %s due to %s.  Measurement'
                        ' was skipped, which could lead to unstable and/or'
                        ' incorrect assessment of the null_dist' % (measure, e))
                skipped += 1
                continue
예제 #32
0
 def _check_range(self, c):
     """ check and adjust the voxel coordinates"""
     # check range
     if __debug__:
         debug('ATL__', "Querying for voxel %r" % (c, ))
     if not check_range(c, self.extent):
         warning("Coordinates %r are not within the extent %r." \
                 " Reseting to (0,0,0)" % (c, self.extent))
         # assume that voxel [0,0,0] is blank, i.e. carries
         # no labels which could possibly result in evil outcome
         c = [0] * 3
     return c
예제 #33
0
파일: base.py 프로젝트: esc/PyMVPA
 def _check_range(self, c):
     """ check and adjust the voxel coordinates"""
     # check range
     if __debug__:
         debug('ATL__', "Querying for voxel %r" % (c,))
     if not check_range(c, self.extent):
         warning("Coordinates %r are not within the extent %r." \
                 " Reseting to (0,0,0)" % (c, self.extent))
         # assume that voxel [0,0,0] is blank, i.e. carries
         # no labels which could possibly result in evil outcome
         c = [0]*3
     return c
예제 #34
0
    def _forward_dataset_grouped(self, ds):
        mdata = []  # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(zip(col.keys(), [[] for i in col]))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(
            zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs]))
        # let it generate all combinations of unique elements in any attr
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply, [
                array_whereequal(col[attr].value, value)
                for attr, value in comb.iteritems()
            ])
            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning(
                    'There were no samples for combination %s. It might be '
                    'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = np.apply_along_axis(self.__fx, axis, samples,
                                               *self.__fxargs)
            mdata.append(fxed_samples)
            if not self.__attrfx is None:
                # and now all samples attributes
                fxed_attrs = [
                    self.__attrfx(col[attr].value[selector]) for attr in col
                ]
                for i, attr in enumerate(col):
                    attrs[attr].append(fxed_attrs[i])

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs
예제 #35
0
    def __init__(self,  **kwargs):
        """
        Initialize GLM-Net multinomial classifier.

        See the help in R for further details on the parameters
        """
        # make sure they didn't specify regression
        if not kwargs.pop('family', None) is None:
            warning('You specified the "family" parameter, but we '
                    'force this to be "multinomial".')

        # init base class first, forcing regression
        _GLMNET.__init__(self, family='multinomial', **kwargs)
예제 #36
0
    def predict(self, dataset):
        """Predict classifier on data

        Shouldn't be overridden in subclasses unless explicitly needed
        to do so. Also subclasses trying to call super class's predict
        should call _predict if within _predict instead of predict()
        since otherwise it would loop
        """
        ## ??? yoh: changed to asany from as without exhaustive check
        data = np.asanyarray(dataset.samples)
        if __debug__:
            debug("CLF",
                  "Predicting classifier %(clf)s on ds %(dataset)s",
                  msgargs={
                      'clf': self,
                      'dataset': dataset
                  })

        # remember the time when started computing predictions
        t0 = time.time()

        ca = self.ca
        # to assure that those are reset (could be set due to testing
        # post-training)
        ca.reset(['estimates', 'predictions'])

        self._prepredict(dataset)

        if self.__trainednfeatures > 0 \
               or 'notrain2predict' in self.__tags__:
            result = self._predict(dataset)
        else:
            warning(
                "Trying to predict using classifier trained on no features")
            if __debug__:
                debug("CLF",
                      "No features were present for training, prediction is " \
                      "bogus")
            result = [None] * data.shape[0]

        ca.predicting_time = time.time() - t0

        # with labels mapping in-place, we also need to go back to the
        # literal labels
        if self._attrmap:
            try:
                result = self._attrmap.to_literal(result)
            except KeyError, e:
                raise FailedToPredictError, \
                      "Failed to convert predictions from numeric into " \
                      "literals: %s" % e
예제 #37
0
파일: fx.py 프로젝트: esc/PyMVPA
    def _forward_dataset_grouped(self, ds):
        mdata = [] # list of samples array pieces
        if self.__axis == 'samples':
            col = ds.sa
            axis = 0
        elif self.__axis == 'features':
            col = ds.fa
            axis = 1
        else:
            raise RuntimeError("This should not have happened!")

        attrs = dict(zip(col.keys(), [[] for i in col]))

        # create a dictionary for all unique elements in all attribute this
        # mapper should operate on
        self.__attrcombs = dict(zip(self.__uattrs,
                                [col[attr].unique for attr in self.__uattrs]))
        # let it generate all combinations of unique elements in any attr
        for comb in _orthogonal_permutations(self.__attrcombs):
            selector = reduce(np.multiply,
                                [array_whereequal(col[attr].value, value)
                                 for attr, value in comb.iteritems()])
            # process the samples
            if axis == 0:
                samples = ds.samples[selector]
            else:
                samples = ds.samples[:, selector]

            # check if there were any samples for such a combination,
            # if not -- warning and skip the rest of the loop body
            if not len(samples):
                warning('There were no samples for combination %s. It might be '
                        'a sign of a disbalanced dataset %s.' % (comb, ds))
                continue

            fxed_samples = np.apply_along_axis(self.__fx, axis, samples,
                                              *self.__fxargs)
            mdata.append(fxed_samples)
            if not self.__attrfx is None:
                # and now all samples attributes
                fxed_attrs = [self.__attrfx(col[attr].value[selector])
                                    for attr in col]
                for i, attr in enumerate(col):
                    attrs[attr].append(fxed_attrs[i])

        if axis == 0:
            mdata = np.vstack(mdata)
        else:
            mdata = np.vstack(np.transpose(mdata))
        return mdata, attrs
예제 #38
0
파일: wavelet.py 프로젝트: gorlins/PyMVPA
    def _reverse(self, data):
        if __debug__:
            debug('MAP', "Converting signal back using DWP")

        if self.__level is None:
            raise NotImplementedError
        else:
            if not externals.exists('pywt wp reconstruct'):
                raise NotImplementedError, \
                      "Reconstruction for a single level for versions of " \
                      "pywt < 0.1.7 (revision 103) is not supported"
            if not externals.exists('pywt wp reconstruct fixed'):
                warning("Reconstruction using available version of pywt might "
                        "result in incorrect data in the tails of the signal")
            return self.__reverseSingleLevel(data)
예제 #39
0
파일: __init__.py 프로젝트: geeragh/PyMVPA
def run_nose_tests():
    """Run nose-based tests -- really really silly way, just to get started

    TODO: just switch to using numpy.testing framework, for that
          unittests need to be cleaned and unified first
    """
    nosetests = collect_nose_tests()
    if not externals.exists('nose'):
        warning("You do not have python-nose installed -- no tests %s were ran"
                % (', '.join(nosetests)))
        return
    from nose import main
    # main.config.verbosity = int(cfg.get('tests', 'verbosity', default=1))
    for nt in nosetests:
        main(defaultTest='mvpa.tests.' + nt, exit=False)
예제 #40
0
    def __init__(self,
                 gnb,
                 splitter,
                 qe,
                 errorfx=MeanMismatchErrorFx(),
                 indexsum=None,
                 **kwargs):
        """Initialize a GNBSearchlight

        Parameters
        ----------
        gnb : `GNB`
          `GNB` classifier as the specification of what GNB parameters
          to use. Instance itself isn't used.
        splitter : `Splitter`
          `Splitter` to use to compute the error.
        errorfx : func, optional
          Functor that computes a scalar error value from the vectors of
          desired and predicted values (e.g. subclass of `ErrorFunction`)
        indexsum : ('sparse', 'fancy'), optional
          What use to compute sums over arbitrary columns.  'fancy'
          corresponds to regular fancy indexing over columns, whenever
          in 'sparse', produce of sparse matrices is used (usually
          faster, so is default if `scipy` is available.
        """

        # init base class first
        BaseSearchlight.__init__(self, qe, **kwargs)

        self._errorfx = errorfx
        self._splitter = splitter
        self._gnb = gnb

        if indexsum is None:
            if externals.exists('scipy'):
                indexsum = 'sparse'
            else:
                indexsum = 'fancy'
        else:
            if indexsum == 'sparse' and not externals.exists('scipy'):
                warning("Scipy.sparse isn't available so taking 'fancy' as "
                        "'indexsum' method.")
                indexsum = 'fancy'
        self._indexsum = indexsum

        if not self._nproc in (None, 1):
            raise NotImplementedError, "For now only nproc=1 (or None for " \
                  "autodetection) is supported by GNBSearchlight"
예제 #41
0
    def __init__(self,  **kwargs):
        """
        Initialize GLM-Net.

        See the help in R for further details on the parameters
        """
        # make sure they didn't specify incompatible model
        regr_family = 'gaussian'
        family = kwargs.pop('family', regr_family).lower()
        if family != regr_family:
            warning('You specified the parameter family=%s, but we '
                    'force this to be "%s" for regression.'
                    % (family, regr_family))
            family = regr_family

        # init base class first, forcing regression
        _GLMNET.__init__(self, family=family, **kwargs)
예제 #42
0
파일: base.py 프로젝트: geeragh/PyMVPA
    def predict(self, dataset):
        """Predict classifier on data

        Shouldn't be overridden in subclasses unless explicitly needed
        to do so. Also subclasses trying to call super class's predict
        should call _predict if within _predict instead of predict()
        since otherwise it would loop
        """
        ## ??? yoh: changed to asany from as without exhaustive check
        data = np.asanyarray(dataset.samples)
        if __debug__:
            debug("CLF", "Predicting classifier %(clf)s on ds %(dataset)s",
                msgargs={'clf':self, 'dataset':dataset})

        # remember the time when started computing predictions
        t0 = time.time()

        ca = self.ca
        # to assure that those are reset (could be set due to testing
        # post-training)
        ca.reset(['estimates', 'predictions'])

        self._prepredict(dataset)

        if self.__trainednfeatures > 0 \
               or 'notrain2predict' in self.__tags__:
            result = self._predict(dataset)
        else:
            warning("Trying to predict using classifier trained on no features")
            if __debug__:
                debug("CLF",
                      "No features were present for training, prediction is " \
                      "bogus")
            result = [None]*data.shape[0]

        ca.predicting_time = time.time() - t0

        # with labels mapping in-place, we also need to go back to the
        # literal labels
        if self._attrmap:
            try:
                result = self._attrmap.to_literal(result)
            except KeyError, e:
                raise FailedToPredictError, \
                      "Failed to convert predictions from numeric into " \
                      "literals: %s" % e
예제 #43
0
    def _train(self, data):
        """Train the classifier.

        For kNN it is degenerate -- just stores the data.
        """
        self.__data = data
        if __debug__:
            if str(data.samples.dtype).startswith('uint') \
                or str(data.samples.dtype).startswith('int'):
                warning("kNN: input data is in integers. " + \
                        "Overflow on arithmetic operations might result in"+\
                        " errors. Please convert dataset's samples into" +\
                        " floating datatype if any error is reported.")
        self.__weights = None

        # create dictionary with an item for each condition
        uniquelabels = data.sa[self.params.targets_attr].unique
        self.__votes_init = dict(zip(uniquelabels, [0] * len(uniquelabels)))
예제 #44
0
    def _forward_dataset(self, ds):
        # local binding
        chunks_attr = self.__chunks_attr
        dtype = self.__dtype

        if __debug__ and not chunks_attr is None \
          and np.array(get_nsamples_per_attr(ds, chunks_attr).values()).min() <= 2:
            warning("Z-scoring chunk-wise having a chunk with less than three "
                    "samples will set features in these samples to either zero "
                    "(with 1 sample in a chunk) "
                    "or -1/+1 (with 2 samples in a chunk).")

        params = self.__params_dict
        if params is None:
            raise RuntimeError, \
                  "ZScoreMapper needs to be trained before call to forward"

        if self._secret_inplace_zscore:
            mds = ds
        else:
            # shallow copy to put the new stuff in
            mds = ds.copy(deep=False)

        # cast the data to float, since in-place operations below do not upcast!
        if np.issubdtype(mds.samples.dtype, np.integer):
            mds.samples = mds.samples.astype(dtype)

        if '__all__' in params:
            # we have a global parameter set
            mds.samples = self._zscore(mds.samples, *params['__all__'])
        else:
            # per chunk z-scoring
            for c in mds.sa[chunks_attr].unique:
                if not c in params:
                    raise RuntimeError(
                        "%s has no parameters for chunk '%s'. It probably "
                        "wasn't present in the training dataset!?"
                        % (self.__class__.__name__, c))
                slicer = np.where(mds.sa[chunks_attr].value == c)[0]
                mds.samples[slicer] = self._zscore(mds.samples[slicer],
                                                   *params[c])

        return mds
예제 #45
0
    def _wm_reverse(self, data):
        if __debug__:
            debug('MAP', "Converting signal back using DWP")

        if self.__level is None:
            raise NotImplementedError
        else:
            if not externals.exists('pywt wp reconstruct'):
                raise NotImplementedError, \
                      "Reconstruction for a single level for versions of " \
                      "pywt < 0.1.7 (revision 103) is not supported"
            if not externals.exists('pywt wp reconstruct fixed'):
                warning(
                    "%s: Reverse mapping with this version of 'pywt' might "
                    "result in incorrect data in the tails of the signal. "
                    "Please check for an update of 'pywt', or be careful "
                    "when interpreting the edges of the reverse mapped "
                    "data." % self.__class__.__name__)
            return self.__reverse_single_level(data)
예제 #46
0
def _setdebug(obj, partname):
    """Helper to set level of debugging output for SG
    Parameters
    ----------
    obj
      In SG debug output seems to be set per every object
    partname : str
      For what kind of object we are talking about... could be automated
      later on (TODO)
    """
    if _M_DEBUG is None:
        return
    debugname = "SG_%s" % partname.upper()

    switch = {
        True: (_M_DEBUG, 'M_DEBUG', "enable"),
        False: (_M_ERROR, 'M_ERROR', "disable"),
        'GCDEBUG': (_M_GCDEBUG, 'M_GCDEBUG', "enable")
    }

    if __debug__:
        if 'SG_GC' in debug.active:
            key = 'GCDEBUG'
        else:
            key = debugname in debug.active
    else:
        key = False

    sglevel, slevel, progressfunc = switch[key]

    if __debug__ and 'SG_' in debug.active:
        debug(
            "SG_", "Setting verbosity for shogun.%s instance: %s to %s" %
            (partname, ` obj `, slevel))
    if sglevel is not None:
        obj.io.set_loglevel(sglevel)
    if __debug__ and 'SG_LINENO' in debug.active:
        try:
            obj.io.enable_file_and_line()
        except AttributeError, e:
            warning("Cannot enable SG_LINENO debug target for shogun %s" %
                    externals.versions['shogun'])
예제 #47
0
    def __call__(self, dataset):
        """Compute measure on a given `Dataset`.

        Each implementation has to handle a single arguments: the source
        dataset.

        Returns the computed measure in some iterable (list-like)
        container applying a post-processing mapper if such is defined.
        """
        result = self._call(dataset)
        result = self._postcall(dataset, result)

        # XXX Remove when "sensitivity-return-dataset" transition is done
        if __debug__ \
           and not isinstance(result, AttrDataset) \
           and not len(result.shape) == 1:
            warning("Postprocessing of '%s' doesn't return a Dataset, or "
                    "1D-array (got: '%s')."
                    % (self.__class__.__name__, result))
        return result
예제 #48
0
def run_tests_using_nose(limit=None, verbosity=1, exit_=False):
    """Run nose-based tests -- really really silly way, just to get started

    TODO: just switch to using numpy.testing framework, for that
          unittests need to be cleaned and unified first
    """
    nosetests = collect_nose_tests(verbosity=verbosity)

    if not externals.exists('nose'):
        warning("You do not have python-nose installed.  Some unittests were "
                "skipped: %s" % (', '.join(nosetests)))
        return

    from nose import main
    import nose
    import nose.config

    tests = collect_unit_tests(verbosity=verbosity) + nosetests

    config = nose.config.Config(verbosity=verbosity,
                                plugins=nose.plugins.DefaultPluginManager())
    if limit is None:
        # Lets see if we aren't missing any:
        if verbosity:
            import os, glob
            testfiles = glob.glob('%s%stest_*.py' %
                                  (os.path.dirname(__file__), os.path.sep))
            not_tested = set([os.path.basename(f) for f in testfiles]) \
                         - set(['%s.py' % f for f in tests])
            if len(not_tested):
                print(
                    "T: Warning -- following test files were found but will "
                    "not be tested: %s" % ', '.join(not_tested))
        config.testNames = ['mvpa.tests.' + nt for nt in tests]
    else:
        config.testNames = [
            'mvpa.tests.' + nt for nt in tests if nt[5:] in limit
        ]

    # run the tests
    _ = main(defaultTest=(), config=config, exit=exit_)
예제 #49
0
    def _get_default_c(self, data):
        """Compute default C

        TODO: for non-linear SVMs
        """

        if self.params.kernel.__kernel_name__ == 'linear':
            datasetnorm = np.mean(np.sqrt(np.sum(data * data, axis=1)))
            if datasetnorm == 0:
                warning("Obtained degenerate data with zero norm for training "
                        "of %s.  Scaling of C cannot be done." % self)
                return 1.0
            value = 1.0 / (datasetnorm**2)
            if __debug__:
                debug("SVM", "Default C computed to be %f" % value)
        else:
            warning("TODO: Computation of default C is not yet implemented" +
                    " for non-linear SVMs. Assigning 1.0")
            value = 1.0

        return value
예제 #50
0
def test_all_dependencies(force=False, verbosity=1):
    """
    Test for all known dependencies.

    Parameters
    ----------
    force : boolean
      Whether to force the test even if it has already been
      performed.

    """
    # loop over all known dependencies
    for dep in _KNOWN:
        if not exists(dep, force):
            if verbosity:
                warning("%s is not available." % dep)

    if __debug__:
        debug('EXT', 'The following optional externals are present: %s' \
                     % [ k[5:] for k in cfg.options('externals')
                            if k.startswith('have') \
                            and cfg.getboolean('externals', k) == True ])
예제 #51
0
    def train(self, dataset):
        """Train classifier on a dataset

        Shouldn't be overridden in subclasses unless explicitly needed
        to do so
        """
        if dataset.nfeatures == 0 or dataset.nsamples == 0:
            raise DegenerateInputError, \
                  "Cannot train classifier on degenerate data %s" % dataset
        if __debug__:
            debug("CLF",
                  "Training classifier %(clf)s on dataset %(dataset)s",
                  msgargs={
                      'clf': self,
                      'dataset': dataset
                  })

        self._pretrain(dataset)

        # remember the time when started training
        t0 = time.time()

        if dataset.nfeatures > 0:

            result = self._train(dataset)
        else:
            warning("Trying to train on dataset with no features present")
            if __debug__:
                debug("CLF",
                      "No features present for training, no actual training " \
                      "is called")
            result = None

        self.ca.training_time = time.time() - t0
        self._posttrain(dataset)
        return result
예제 #52
0
    def test_confusion_based_error(self, l_clf):
        train = datasets['uni2medium_train']
        # to check if we fail to classify for 3 labels
        test3 = datasets['uni3medium_train']
        err = ConfusionBasedError(clf=l_clf)
        terr = TransferError(clf=l_clf)

        self.failUnlessRaises(UnknownStateError, err, None)
        """Shouldn't be able to access the state yet"""

        l_clf.train(train)
        e, te = err(None), terr(train)
        self.failUnless(
            abs(e - te) < 1e-10,
            msg="ConfusionBasedError (%.2g) should be equal to TransferError "
            "(%.2g) on traindataset" % (e, te))

        # this will print nasty WARNING but it is ok -- it is just checking code
        # NB warnings are not printed while doing whole testing
        warning("Don't worry about the following warning.")
        self.failIf(terr(test3) is None)

        # try copying the beast
        terr_copy = copy(terr)
예제 #53
0
 def __init__(self, *args, **kwargs):
     """Initialize dummy report
     """
     warning("You are using DummyReport - no action will be taken. "
             "Please install reportlab to enjoy reporting facility "
             "within PyMVPA")
예제 #54
0
def plot_erp(data, SR=500, onsets=None,
            pre=0.2, pre_onset=None, post=None, pre_mean=None,
            color='r', errcolor=None, errtype=None, ax=pl,
            ymult=1.0, *args, **kwargs):
    """Plot single ERP on existing canvas

    Parameters
    ----------
    data : 1D or 2D ndarray
      The data array can either be 1D (samples over time) or 2D
      (trials x samples). In the first case a boxcar mapper is used to
      extract the respective trial timecourses given a list of trial onsets.
      In the latter case, each row of the data array is taken as the EEG
      signal timecourse of a particular trial.
    onsets : list(int)
      List of onsets (in samples not in seconds).
    SR : int, optional
      Sampling rate (1/s) of the signal.
    pre : float, optional
      Duration (in seconds) to be plotted prior to onset.
    pre_onset : float or None
      If data is already in epochs (2D) then pre_onset provides information
      on how many seconds pre-stimulus were used to generate them. If None,
      then pre_onset = pre
    post : float
      Duration (in seconds) to be plotted after the onset.
    pre_mean : float
      Duration (in seconds) at the beginning of the window which is used
      for deriving the mean of the signal. If None, pre_mean = pre
    errtype : None or 'ste' or 'std' or 'ci95' or list of previous three
      Type of error value to be computed per datapoint.  'ste' --
      standard error of the mean, 'std' -- standard deviation 'ci95'
      -- 95% confidence interval (1.96 * ste), None -- no error margin
      is plotted (default)
      Optionally, multiple error types can be specified in a list. In that
      case all of them will be plotted.
    color : matplotlib color code, optional
      Color to be used for plotting the mean signal timecourse.
    errcolor : matplotlib color code
      Color to be used for plotting the error margin. If None, use main color
      but with weak alpha level
    ax :
      Target where to draw.
    ymult : float, optional
      Multiplier for the values. E.g. if negative-up ERP plot is needed:
      provide ymult=-1.0
    *args, **kwargs
      Additional arguments to `pylab.plot`.

    Returns
    -------
    array
      Mean ERP timeseries.
    """
    if pre_mean is None:
        pre_mean = pre

    # set default
    pre_discard = 0

    if onsets is not None: # if we need to extract ERPs
        if post is None:
            raise ValueError, \
                  "Duration post onsets must be provided if onsets are given"
        # trial timecourse duration
        duration = pre + post

        # We are working with a full timeline
        bcm = BoxcarMapper(onsets,
                           boxlength = int(SR * duration),
                           offset = -int(SR * pre))
        erp_data = bcm(data)

        # override values since we are using Boxcar
        pre_onset = pre
    else:
        if pre_onset is None:
            pre_onset = pre

        if pre_onset < pre:
            warning("Pre-stimulus interval to plot %g is smaller than provided "
                    "pre-stimulus captured interval %g, thus plot interval was "
                    "adjusted" % (pre, pre_onset))
            pre = pre_onset

        if post is None:
            # figure out post
            duration = float(data.shape[1]) / SR - pre_discard
            post = duration - pre
        else:
            duration = pre + post

        erp_data = data
        pre_discard = pre_onset - pre

    # Scale the data appropriately
    erp_data *= ymult

    # validity check -- we should have 2D matrix (trials x samples)
    if len(erp_data.shape) != 2:
        raise RuntimeError, \
              "plot_erp() supports either 1D data with onsets, or 2D data " \
              "(trials x sample_points). Shape of the data at the point " \
              "is %s" % erp_data.shape

    if not (pre_mean == 0 or pre_mean is None):
        # mean of pre-onset signal accross trials
        erp_baseline = np.mean(
            erp_data[:, int((pre_onset-pre_mean)*SR):int(pre_onset*SR)])
        # center data on pre-onset mean
        # NOTE: make sure that we make a copy of the data to don't
        #       alter the original. Better be safe than sorry
        erp_data = erp_data - erp_baseline

    # generate timepoints and error ranges to plot filled error area
    # top ->
    # bottom <-
    time_points = np.arange(erp_data.shape[1]) * 1.0 / SR - pre_onset

    # if pre != pre_onset
    if pre_discard > 0:
        npoints = int(pre_discard * SR)
        time_points = time_points[npoints:]
        erp_data = erp_data[:, npoints:]

    # select only time points of interest (if post is provided)
    if post is not None:
        npoints = int(duration * SR)
        time_points = time_points[:npoints]
        erp_data = erp_data[:, :npoints]

    # compute mean signal timecourse accross trials
    erp_mean = np.mean(erp_data, axis=0)

    # give sane default
    if errtype is None:
        errtype = []
    if not isinstance(errtype, list):
        errtype = [errtype]

    for et in errtype:
        # compute error per datapoint
        if et in ['ste', 'ci95']:
            erp_stderr = erp_data.std(axis=0) / np.sqrt(len(erp_data))
            if et == 'ci95':
                erp_stderr *= 1.96
        elif et == 'std':
            erp_stderr = erp_data.std(axis=0)
        else:
            raise ValueError, "Unknown error type '%s'" % errtype

        time_points2w = np.hstack((time_points, time_points[::-1]))

        error_top = erp_mean + erp_stderr
        error_bottom = erp_mean - erp_stderr
        error2w = np.hstack((error_top, error_bottom[::-1]))

        if errcolor is None:
            errcolor = color

        # plot error margin
        pfill = ax.fill(time_points2w, error2w,
                        edgecolor=errcolor, facecolor=errcolor, alpha=0.2,
                        zorder=3)

    # plot mean signal timecourse
    ax.plot(time_points, erp_mean, lw=2, color=color, zorder=4,
            *args, **kwargs)
#    ax.xaxis.set_major_locator(pl.MaxNLocator(4))
    return erp_mean
예제 #55
0
        if __debug__:
            debug(
                'EXT', "Presence of %s is%s verified%s" % (dep, {
                    True: '',
                    False: ' NOT'
                }[result], estr))

    if not result:
        if raise_ \
               and cfg.getboolean('externals', 'raise exception', True):
            raise RuntimeError, "Required external '%s' was not found" % dep
        if issueWarning is not None \
               and cfg.getboolean('externals', 'issue warning', True):
            if issueWarning is True:
                warning("Required external '%s' was not found" % dep)
            else:
                warning(issueWarning)

    # store result in config manager
    if not cfg.has_section('externals'):
        cfg.add_section('externals')
    if result:
        cfg.set('externals', 'have ' + dep, 'yes')
    else:
        cfg.set('externals', 'have ' + dep, 'no')

    return result


# Bind functions for some versions checkings
예제 #56
0
def coarsen_chunks(source, nchunks=4):
    """Change chunking of the dataset

    Group chunks into groups to match desired number of chunks. Makes
    sense if originally there were no strong groupping into chunks or
    each sample was independent, thus belonged to its own chunk

    Parameters
    ----------
    source : Dataset or list of chunk ids
      dataset or list of chunk ids to operate on. If Dataset, then its chunks
      get modified
    nchunks : int
      desired number of chunks
    """

    if isinstance(source, Dataset):
        chunks = source.chunks
    else:
        chunks = source
    chunks_unique = np.unique(chunks)
    nchunks_orig = len(chunks_unique)

    if nchunks_orig < nchunks:
        raise ValueError, \
              "Original number of chunks is %d. Cannot coarse them " \
              "to get %d chunks" % (nchunks_orig, nchunks)

    # figure out number of samples per each chunk
    counts = dict(zip(chunks_unique, [ 0 ] * len(chunks_unique)))
    for c in chunks:
        counts[c] += 1

    # now we need to group chunks to get more or less equalized number
    # of samples per chunk. No sophistication is done -- just
    # consecutively group to get close to desired number of samples
    # per chunk
    avg_chunk_size = np.sum(counts.values())*1.0/nchunks
    chunks_groups = []
    cur_chunk = []
    nchunks = 0
    cur_chunk_nsamples = 0
    samples_counted = 0
    for i, c in enumerate(chunks_unique):
        cc = counts[c]

        cur_chunk += [c]
        cur_chunk_nsamples += cc

        # time to get a new chunk?
        if (samples_counted + cur_chunk_nsamples
            >= (nchunks+1)*avg_chunk_size) or i==nchunks_orig-1:
            chunks_groups.append(cur_chunk)
            samples_counted += cur_chunk_nsamples
            cur_chunk_nsamples = 0
            cur_chunk = []
            nchunks += 1

    if len(chunks_groups) != nchunks:
        warning("Apparently logic in coarseChunks is wrong. "
                "It was desired to get %d chunks, got %d"
                % (nchunks, len(chunks_groups)))

    # remap using groups
    # create dictionary
    chunks_map = {}
    for i, group in enumerate(chunks_groups):
        for c in group:
            chunks_map[c] = i

    # we always want an array!
    chunks_new = np.array([chunks_map[x] for x in chunks])

    if __debug__:
        debug("DS_", "Using dictionary %s to remap old chunks %s into new %s"
              % (chunks_map, chunks, chunks_new))

    if isinstance(source, Dataset):
        if __debug__:
            debug("DS", "Coarsing %d chunks into %d chunks for %s"
                  %(nchunks_orig, len(chunks_new), source))
        source.sa['chunks'].value = chunks_new
        return
    else:
        return chunks_new
예제 #57
0
    from mvpa.kernels.sg import SGKernel, LinearSGKernel
    # set the default kernel here, to be able to import this module
    # when building the docs without SG
    _default_kernel_class_ = LinearSGKernel

    # Figure out debug IDs once and for all
    if hasattr(shogun.Classifier, 'M_DEBUG'):
        _M_DEBUG = shogun.Classifier.M_DEBUG
        _M_ERROR = shogun.Classifier.M_ERROR
        _M_GCDEBUG = None
    elif hasattr(shogun.Classifier, 'MSG_DEBUG'):
        _M_DEBUG = shogun.Classifier.MSG_DEBUG
        _M_ERROR = shogun.Classifier.MSG_ERROR
    else:
        _M_DEBUG, _M_ERROR = None, None
        warning("Could not figure out debug IDs within shogun. "
                "No control over shogun verbosity would be provided")
    # Highest level
    if hasattr(shogun.Classifier, 'MSG_GCDEBUG'):
        _M_GCDEBUG = shogun.Classifier.MSG_GCDEBUG
    else:
        _M_GCDEBUG = None

else:
    # set a fake default kernel here, to be able to import this module
    # when building the docs without SG
    _default_kernel_class_ = None

import operator

from mvpa.misc.param import Parameter
from mvpa.misc.attrmap import AttributeMap
예제 #58
0
    def _set_retrainable(self, value, force=False):
        """Assign value of retrainable parameter

        If retrainable flag is to be changed, classifier has to be
        untrained.  Also internal attributes such as _changedData,
        __changedData_isset, and __idhashes should be initialized if
        it becomes retrainable
        """
        pretrainable = self.params['retrainable']
        if (force or value != pretrainable.value) \
               and 'retrainable' in self.__tags__:
            if __debug__:
                debug("CLF_", "Setting retrainable to %s" % value)
            if 'meta' in self.__tags__:
                warning("Retrainability is not yet crafted/tested for "
                        "meta classifiers. Unpredictable behavior might occur")
            # assure that we don't drag anything behind
            if self.trained:
                self.untrain()
            ca = self.ca
            if not value and ca.has_key('retrained'):
                ca.pop('retrained')
                ca.pop('repredicted')
            if value:
                if not 'retrainable' in self.__tags__:
                    warning(
                        "Setting of flag retrainable for %s has no effect"
                        " since classifier has no such capability. It would"
                        " just lead to resources consumption and slowdown" %
                        self)
                ca['retrained'] = ConditionalAttribute(
                    enabled=True,
                    doc="Either retrainable classifier was retrained")
                ca['repredicted'] = ConditionalAttribute(
                    enabled=True,
                    doc="Either retrainable classifier was repredicted")

            pretrainable.value = value

            # if retrainable we need to keep track of things
            if value:
                self.__idhashes = {
                    'traindata': None,
                    'targets': None,
                    'testdata': None
                }  #, 'testtraindata': None}
                if __debug__ and 'CHECK_RETRAIN' in debug.active:
                    # ??? it is not clear though if idhash is faster than
                    # simple comparison of (dataset != __traineddataset).any(),
                    # but if we like to get rid of __traineddataset then we
                    # should use idhash anyways
                    self.__trained = self.__idhashes.copy()  # just same Nones
                self.__reset_changed_data()
                self.__invalidatedChangedData = {}
            elif 'retrainable' in self.__tags__:
                #self.__reset_changed_data()
                self.__changedData_isset = False
                self._changedData = None
                self.__idhashes = None
                if __debug__ and 'CHECK_RETRAIN' in debug.active:
                    self.__trained = None
예제 #59
0
from mvpa.misc.exceptions import ConvergenceError
from mvpa.misc.param import Parameter
from mvpa.misc.state import ConditionalAttribute
from mvpa.datasets.base import Dataset

__all__ = ["SMLR", "SMLRWeights"]

_DEFAULT_IMPLEMENTATION = "Python"
if externals.exists('ctypes'):
    # Uber-fast C-version of the stepwise regression
    try:
        from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression
        _DEFAULT_IMPLEMENTATION = "C"
    except OSError, e:
        warning("Failed to load fast implementation of SMLR.  May be you "
                "forgotten to build it.  We will use much slower pure-Python "
                "version")
        _cStepwiseRegression = None
else:
    _cStepwiseRegression = None
    warning("SMLR implementation without ctypes is overwhelmingly slow."
            " You are strongly advised to install python-ctypes")

if __debug__:
    from mvpa.base import debug


def _label2oneofm(labels, ulabels):
    """Convert labels to one-of-M form.

    TODO: Might be useful elsewhere so could migrate into misc/