def labelVoxel(self, c, levels = None): if self.__referenceLevel is None: warning("You did not provide what level to use " "for reference. Assigning 0th level -- '%s'" % (self._levels_dict[0],)) self.setReferenceLevel(0) # return self.__referenceAtlas.labelVoxel(c, levels) c = self._checkRange(c) # obtain coordinates of the closest voxel cref = self._data[ self.__referenceLevel.indexes, c[2], c[1], c[0] ] dist = norm( (cref - c) * self.voxdim ) if __debug__: debug('ATL__', "Closest referenced point for %s is " "%s at distance %3.2f" % (`c`, `cref`, dist)) if (self.distance - dist) >= 1e-3: # neglect everything smaller result = self.__referenceAtlas.labelVoxel(cref, levels) result['voxel_referenced'] = c result['distance'] = dist else: result = self.__referenceAtlas.labelVoxel(c, levels) if __debug__: debug('ATL__', "Closest referenced point is " "further than desired distance %.2f" % self.distance) result['voxel_referenced'] = None result['distance'] = 0 return result
def __call__(self, predicted, target): """Requires all arguments.""" from mvpa.base import warning warning( "p-value for correlation is implemented only when scipy is " "available. Bogus value -1.0 is returned otherwise") return -1.0
def __init__(self, samples=None, **kwargs): """Initialize EEPDataset. :Parameters: samples: Filename (string) of a EEP binary file or an `EEPBin` object """ # dataset props defaults dt = t0 = channelids = None # default way to use the constructor: with filename if not samples is None: if isinstance(samples, str): # open the eep file try: eb = EEPBin(samples) except RuntimeError, e: warning("ERROR: EEPDatasets: Cannot open samples file %s" \ % samples) # should we make also error? raise e elif isinstance(samples, EEPBin): # nothing special eb = samples else: raise ValueError, \ "EEPDataset constructor takes the filename of an " \ "EEP file or a EEPBin object as 'samples' argument." samples = eb.data dt = eb.dt channelids = eb.channels t0 = eb.t0
def _getUniqueLengthNCombinations_binary(L, n=None, sort=True): """Find all subsets of data :Parameters: L : list list of unique ids n : None or int If None, all possible subsets are returned. If n is specified (int), then only the ones of the length n are returned sort : bool Either to sort the resultant sequence Adopted from Alex Martelli: http://mail.python.org/pipermail/python-list/2001-January/067815.html """ N = len(L) if N > 20 and n == 1: warning("getUniqueLengthNCombinations_binary should not be used for " "large N") result = [] for X in range(2**N): x = [ L[i] for i in range(N) if X & (1L<<i) ] if n is None or len(x) == n: # yield x # if we wanted to use it as a generator result.append(x) result.sort() # if __debug__ and n is not None: # # verify the result # # would need scipy... screw it # assert(len(result) == ...) return result
def _load_anynifti(src, ensure=False, enforce_dim=None): """Load/access NIfTI data from files or instances. Parameters ---------- src : str or NiftiImage Filename of a NIfTI image or a `NiftiImage` instance. ensure : bool, optional If True, throw ValueError exception if cannot be loaded. enforce_dim : int or None If not None, it is the dimensionality of the data to be enforced, commonly 4D for the data, and 3D for the mask in case of fMRI. Returns ------- NiftiImage or None If the source is not supported None is returned. Raises ------ ValueError If there is a problem with data (variable dimensionality) or failed to load data and ensure=True. """ nifti = None # figure out what type if isinstance(src, str): # open the nifti file try: nifti = NiftiImage(src) except RuntimeError, e: warning("ERROR: Cannot open NIfTI file %s" % src) raise e
def __init__(self, **kwargs): """Initialize an SMLR classifier. """ """ TODO: # Add in likelihood calculation # Add kernels, not just direct methods. """ # init base class first Classifier.__init__(self, **kwargs) if _cStepwiseRegression is None and self.params.implementation == 'C': warning('SMLR: C implementation is not available.' ' Using pure Python one') self.params.implementation = 'Python' # pylint friendly initializations self._ulabels = None """Unigue labels from the training set.""" self.__weights_all = None """Contains all weights including bias values""" self.__weights = None """Just the weights, without the biases""" self.__biases = None """The biases, will remain none if has_bias is False"""
def _get_increments(self, ndim): """Creates a list of increments for a given dimensionality RF: lame yoh just cut-pasted and tuned up because everything depends on ndim... """ # Set element_sizes element_sizes = self._element_sizes if element_sizes is None: element_sizes = np.ones(ndim) else: if (ndim != len(element_sizes)): raise ValueError, \ "Dimensionality mismatch: element_sizes %s provided " \ "to constructor had %i dimensions, whenever queried " \ "coordinate had %i" \ % (element_sizes, len(element_sizes), ndim) center = np.zeros(ndim) element_sizes = np.asanyarray(element_sizes) # What range for each dimension erange = np.ceil(self._radius / element_sizes).astype(int) tentative_increments = np.array(list(np.ndindex(tuple(erange*2 + 1)))) \ - erange # Filter out the ones beyond the "sphere" res = array([x for x in tentative_increments if self._inner_radius < self._distance_func(x * element_sizes, center) <= self._radius]) if not len(res): warning("%s defines no neighbors" % self) return res
def train(self, dataset): """Train classifier on a dataset Shouldn't be overridden in subclasses unless explicitly needed to do so """ if dataset.nfeatures == 0 or dataset.nsamples == 0: raise DegenerateInputError, \ "Cannot train classifier on degenerate data %s" % dataset if __debug__: debug("CLF", "Training classifier %(clf)s on dataset %(dataset)s", msgargs={'clf':self, 'dataset':dataset}) self._pretrain(dataset) # remember the time when started training t0 = time.time() if dataset.nfeatures > 0: result = self._train(dataset) else: warning("Trying to train on dataset with no features present") if __debug__: debug("CLF", "No features present for training, no actual training " \ "is called") result = None self.ca.training_time = time.time() - t0 self._posttrain(dataset) return result
def _SLcholesky_autoreg(C, nsteps=None, **kwargs): """Simple wrapper around cholesky to incrementally regularize the matrix until successful computation. For `nsteps` we boost diagonal 10-fold each time from the 'epsilon' of the respective dtype. If None -- would proceed until reaching 1. """ if nsteps is None: nsteps = -int(np.floor(np.log10(np.finfo(float).eps))) result = None for step in xrange(nsteps): epsilon_value = (10**step) * np.finfo(C.dtype).eps epsilon = epsilon_value * np.eye(C.shape[0]) try: result = SLcholesky(C + epsilon, lower=True) except SLAError, e: warning("Cholesky decomposition lead to failure: %s. " "As requested, performing auto-regularization but " "for better control you might prefer to regularize " "yourself by providing lm parameter to GPR" % e) if step < nsteps - 1: if __debug__: debug( "GPR", "Failed to obtain cholesky on " "auto-regularization step %d value %g. Got %s." " Boosting lambda more to reg. C." % (step, epsilon_value, e)) continue else: raise
def _setdebug(obj, partname): """Helper to set level of debugging output for SG :Parameters: obj In SG debug output seems to be set per every object partname : basestring For what kind of object we are talking about... could be automated later on (TODO) """ debugname = "SG_%s" % partname.upper() switch = {True: (shogun.Kernel.M_DEBUG, 'M_DEBUG', "enable"), False: (shogun.Kernel.M_ERROR, 'M_ERROR', "disable")} key = __debug__ and debugname in debug.active sglevel, slevel, progressfunc = switch[key] if __debug__: debug("SG_", "Setting verbosity for shogun.%s instance: %s to %s" % (partname, `obj`, slevel)) obj.io.set_loglevel(sglevel) try: exec "obj.io.%s_progress()" % progressfunc except: warning("Shogun version installed has no way to enable progress" + " reports")
def _SLcholesky_autoreg(C, nsteps=None, **kwargs): """Simple wrapper around cholesky to incrementally regularize the matrix until successful computation. For `nsteps` we boost diagonal 10-fold each time from the 'epsilon' of the respective dtype. If None -- would proceed until reaching 1. """ if nsteps is None: nsteps = -int(np.floor(np.log10(np.finfo(float).eps))) result = None for step in xrange(nsteps): epsilon_value = (10**step) * np.finfo(C.dtype).eps epsilon = epsilon_value * np.eye(C.shape[0]) try: result = SLcholesky(C + epsilon, lower=True) except SLAError, e: warning("Cholesky decomposition lead to failure: %s. " "As requested, performing auto-regularization but " "for better control you might prefer to regularize " "yourself by providing lm parameter to GPR" % e) if step < nsteps-1: if __debug__: debug("GPR", "Failed to obtain cholesky on " "auto-regularization step %d value %g. Got %s." " Boosting lambda more to reg. C." % (step, epsilon_value, e)) continue else: raise
def getNiftiFromAnySource(src, ensure=False, enforce_dim=None): """Load/access NIfTI data from files or instances. :Parameters: src: str | NiftiImage Filename of a NIfTI image or a `NiftiImage` instance. ensure : bool If True, through ValueError exception if cannot be loaded. enforce_dim : int or None If not None, it is the dimensionality of the data to be enforced, commonly 4D for the data, and 3D for the mask in case of fMRI. :Returns: NiftiImage | None If the source is not supported None is returned. """ nifti = None # figure out what type if isinstance(src, str): # open the nifti file try: nifti = NiftiImage(src) except RuntimeError, e: warning("ERROR: NiftiDatasets: Cannot open NIfTI file %s" \ % src) raise e
def _precall(self, testdataset, trainingdataset=None): """Generic part which trains the classifier if necessary """ if not trainingdataset is None: if self.__train: # XXX can be pretty annoying if triggered inside an algorithm # where it cannot be switched of, but retraining might be # intended or at least not avoidable. # Additonally is_trained docs say: # MUST BE USED WITH CARE IF EVER # # switching it off for now #if self.__clf.is_trained(trainingdataset): # warning('It seems that classifier %s was already trained' % # self.__clf + ' on dataset %s. Please inspect' \ # % trainingdataset) if self.ca.is_enabled('training_stats'): self.__clf.ca.change_temporarily( enable_ca=['training_stats']) self.__clf.train(trainingdataset) if self.ca.is_enabled('training_stats'): self.ca.training_stats = \ self.__clf.ca.training_stats self.__clf.ca.reset_changed_temporarily() if self.__clf.ca.is_enabled('trained_targets') \ and not self.__clf.__is_regression__ \ and not testdataset is None: newlabels = set(testdataset.sa[self.clf.get_space()].unique) \ - set(self.__clf.ca.trained_targets) if len(newlabels)>0: warning("Classifier %s wasn't trained to classify labels %s" % (self.__clf, newlabels) + " present in testing dataset. Make sure that you have" + " not mixed order/names of the arguments anywhere")
def test_confusion_based_error(self, l_clf): train = datasets['uni2medium'] train = train[train.sa.train == 1] # to check if we fail to classify for 3 labels test3 = datasets['uni3medium'] test3 = test3[test3.sa.train == 1] err = ConfusionBasedError(clf=l_clf) terr = TransferMeasure(l_clf, Splitter('train', attr_values=[1,1]), postproc=BinaryFxNode(mean_mismatch_error, 'targets')) self.failUnlessRaises(UnknownStateError, err, None) """Shouldn't be able to access the state yet""" l_clf.train(train) e, te = err(None), terr(train) te = np.asscalar(te) self.failUnless(abs(e-te) < 1e-10, msg="ConfusionBasedError (%.2g) should be equal to TransferError " "(%.2g) on traindataset" % (e, te)) # this will print nasty WARNING but it is ok -- it is just checking code # NB warnings are not printed while doing whole testing warning("Don't worry about the following warning.") if 'multiclass' in l_clf.__tags__: self.failIf(terr(test3) is None) # try copying the beast terr_copy = copy(terr)
def label_voxel(self, c, levels=None): if self.__referenceLevel is None: warning("You did not provide what level to use " "for reference. Assigning 0th level -- '%s'" % (self._levels[0], )) self.set_reference_level(0) # return self.__referenceAtlas.label_voxel(c, levels) c = self._check_range(c) # obtain coordinates of the closest voxel cref = self._data[self.__referenceLevel.indexes, c[2], c[1], c[0]] dist = norm((cref - c) * self.voxdim) if __debug__: debug( 'ATL__', "Closest referenced point for %r is " "%r at distance %3.2f" % (c, cref, dist)) if (self.distance - dist) >= 1e-3: # neglect everything smaller result = self.__referenceAtlas.label_voxel(cref, levels) result['voxel_referenced'] = c result['distance'] = dist else: result = self.__referenceAtlas.label_voxel(c, levels) if __debug__: debug( 'ATL__', "Closest referenced point is " "further than desired distance %.2f" % self.distance) result['voxel_referenced'] = None result['distance'] = 0 return result
def _call(self, ds): # local binding generator = self._generator node = self._node ca = self.ca space = self.get_space() concat_as = self._concat_as if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or not node.ca.is_enabled("stats")): warning("'stats' conditional attribute was enabled, but " "the assigned node '%s' either doesn't support it, " "or it is disabled" % node) # precharge conditional attributes ca.datasets = [] # run the node an all generated datasets results = [] for i, sds in enumerate(generator.generate(ds)): if ca.is_enabled("datasets"): # store dataset in ca ca.datasets.append(sds) # run the beast result = node(sds) # callback if not self._callback is None: self._callback(data=sds, node=node, result=result) # subclass postprocessing result = self._repetition_postcall(sds, node, result) if space: # XXX maybe try to get something more informative from the # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks' # to indicate what was trained and what was tested. Now it is # more tricky, because `node` could be anything result.set_attr(space, (i,)) # store results.append(result) if ca.is_enabled("stats") and node.ca.has_key("stats") \ and node.ca.is_enabled("stats"): if not ca.is_set('stats'): # create empty stats container of matching type ca.stats = node.ca['stats'].value.__class__() # harvest summary stats ca['stats'].value.__iadd__(node.ca['stats'].value) # charge condition attribute self.ca.repetition_results = results # stack all results into a single Dataset if concat_as == 'samples': results = vstack(results) elif concat_as == 'features': results = hstack(results) else: raise ValueError("Unkown concatenation mode '%s'" % concat_as) # no need to store the raw results, since the Measure class will # automatically store them in a CA return results
def seed(random_seed): if __debug__: debug('SG', "Seeding shogun's RNG with %s" % random_seed) try: # reuse the same seed for shogun shogun.Library.Math_init_random(random_seed) except Exception, e: warning('Shogun cannot be seeded due to %s' % (e, ))
def corr_error_prob(predicted, target): """Computes p-value of correlation between the target and the predicted values. """ from mvpa.base import warning warning("p-value for correlation is implemented only when scipy is " "available. Bogus value -1.0 is returned otherwise") return -1.0
def seed(random_seed): if __debug__: debug('SG', "Seeding shogun's RNG with %s" % random_seed) try: # reuse the same seed for shogun shogun.Library.Math_init_random(random_seed) except Exception, e: warning('Shogun cannot be seeded due to %s' % (e,))
def _pvalue(x, cdf_func, tail, return_tails=False, name=None): """Helper function to return p-value(x) given cdf and tail Parameters ---------- cdf_func : callable Function to be used to derive cdf values for x tail : str ('left', 'right', 'any', 'both') Which tail of the distribution to report. For 'any' and 'both' it chooses the tail it belongs to based on the comparison to p=0.5. In the case of 'any' significance is taken like in a one-tailed test. return_tails : bool If True, a tuple return (pvalues, tails), where tails contain 1s if value was from the right tail, and 0 if the value was from the left tail. """ is_scalar = np.isscalar(x) if is_scalar: x = [x] cdf = cdf_func(x) if __debug__ and 'CHECK_STABILITY' in debug.active: cdf_min, cdf_max = np.min(cdf), np.max(cdf) if cdf_min < 0 or cdf_max > 1.0: s = ('', ' for %s' % name)[int(name is not None)] warning('Stability check of cdf %s failed%s. Min=%s, max=%s' % \ (cdf_func, s, cdf_min, cdf_max)) # no escape but to assure that CDF is in the right range. Some # distributions from scipy tend to jump away from [0,1] cdf = np.clip(cdf, 0, 1.0) if tail == 'left': if return_tails: right_tail = np.zeros(cdf.shape, dtype=bool) elif tail == 'right': cdf = 1 - cdf if return_tails: right_tail = np.ones(cdf.shape, dtype=bool) elif tail in ('any', 'both'): right_tail = (cdf >= 0.5) cdf[right_tail] = 1.0 - cdf[right_tail] if tail == 'both': # we need report the area under both tails # XXX this is only meaningful for symetric distributions cdf *= 2 # Assure that NaNs didn't get significant value cdf[np.isnan(x)] = 1.0 if is_scalar: res = cdf[0] else: res = cdf if return_tails: return (res, right_tail) else: return res
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # pass to subclass results, roi_sizes = self._sl_call(dataset, roi_ids, nproc) if not roi_sizes is None: self.ca.roi_sizes = roi_sizes if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append(StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def fit(self, measure, wdata, vdata=None): """Fit the distribution by performing multiple cycles which repeatedly permuted labels in the training dataset. Parameters ---------- measure: (`Featurewise`)`DatasetMeasure` or `TransferError` TransferError instance used to compute all errors. wdata: `Dataset` which gets permuted and used to compute the measure/transfer error multiple times. vdata: `Dataset` used for validation. If provided measure is assumed to be a `TransferError` and working and validation dataset are passed onto it. """ # TODO: place exceptions separately so we could avoid circular imports from mvpa.clfs.base import LearnerError dist_samples = [] """Holds the values for randomized labels.""" # estimate null-distribution for p in xrange(self.__permutations): # new permutation all the time # but only permute the training data and keep the testdata constant # if __debug__: debug('STATMC', "Doing %i permutations: %i" \ % (self.__permutations, p+1), cr=True) # TODO this really needs to be more clever! If data samples are # shuffled within a class it really makes no difference for the # classifier, hence the number of permutations to estimate the # null-distribution of transfer errors can be reduced dramatically # when the *right* permutations (the ones that matter) are done. permuted_wdata = wdata.copy('shallow') permuted_wdata.permute_attr( attr=self.permute_attr, chunks_attr=self.chunks_attr, col=self.permute_col, assure_permute=self.assure_permute) # decide on the arguments to measure if not vdata is None: measure_args = [vdata, permuted_wdata] else: measure_args = [permuted_wdata] # compute and store the measure of this permutation # assume it has `TransferError` interface try: res = measure(*measure_args) except LearnerError, e: warning('Failed to obtain value from %s due to %s. Measurement' ' was skipped, which could lead to unstable and/or' ' incorrect assessment of the null_dist' % (measure, e)) res = np.asanyarray(res) dist_samples.append(res)
def fit(self, measure, wdata, vdata=None): """Fit the distribution by performing multiple cycles which repeatedly permuted labels in the training dataset. Parameters ---------- measure: (`Featurewise`)`DatasetMeasure` or `TransferError` TransferError instance used to compute all errors. wdata: `Dataset` which gets permuted and used to compute the measure/transfer error multiple times. vdata: `Dataset` used for validation. If provided measure is assumed to be a `TransferError` and working and validation dataset are passed onto it. """ # TODO: place exceptions separately so we could avoid circular imports from mvpa.clfs.base import LearnerError dist_samples = [] """Holds the values for randomized labels.""" # estimate null-distribution for p in xrange(self.__permutations): # new permutation all the time # but only permute the training data and keep the testdata constant # if __debug__: debug('STATMC', "Doing %i permutations: %i" \ % (self.__permutations, p+1), cr=True) # TODO this really needs to be more clever! If data samples are # shuffled within a class it really makes no difference for the # classifier, hence the number of permutations to estimate the # null-distribution of transfer errors can be reduced dramatically # when the *right* permutations (the ones that matter) are done. permuted_wdata = wdata.copy('shallow') permuted_wdata.permute_attr(attr=self.permute_attr, chunks_attr=self.chunks_attr, col=self.permute_col, assure_permute=self.assure_permute) # decide on the arguments to measure if not vdata is None: measure_args = [vdata, permuted_wdata] else: measure_args = [permuted_wdata] # compute and store the measure of this permutation # assume it has `TransferError` interface try: res = measure(*measure_args) except LearnerError, e: warning( 'Failed to obtain value from %s due to %s. Measurement' ' was skipped, which could lead to unstable and/or' ' incorrect assessment of the null_dist' % (measure, e)) res = np.asanyarray(res) dist_samples.append(res)
def _setRetrainable(self, value, force=False): """Assign value of retrainable parameter If retrainable flag is to be changed, classifier has to be untrained. Also internal attributes such as _changedData, __changedData_isset, and __idhashes should be initialized if it becomes retrainable """ pretrainable = self.params['retrainable'] if (force or value != pretrainable.value) \ and 'retrainable' in self._clf_internals: if __debug__: debug("CLF_", "Setting retrainable to %s" % value) if 'meta' in self._clf_internals: warning("Retrainability is not yet crafted/tested for " "meta classifiers. Unpredictable behavior might occur") # assure that we don't drag anything behind if self.trained: self.untrain() states = self.states if not value and states.isKnown('retrained'): states.remove('retrained') states.remove('repredicted') if value: if not 'retrainable' in self._clf_internals: warning("Setting of flag retrainable for %s has no effect" " since classifier has no such capability. It would" " just lead to resources consumption and slowdown" % self) states.add(StateVariable(enabled=True, name='retrained', doc="Either retrainable classifier was retrained")) states.add(StateVariable(enabled=True, name='repredicted', doc="Either retrainable classifier was repredicted")) pretrainable.value = value # if retrainable we need to keep track of things if value: self.__idhashes = {'traindata': None, 'labels': None, 'testdata': None} #, 'testtraindata': None} if __debug__ and 'CHECK_RETRAIN' in debug.active: # ??? it is not clear though if idhash is faster than # simple comparison of (dataset != __traineddataset).any(), # but if we like to get rid of __traineddataset then we # should use idhash anyways self.__trained = self.__idhashes.copy() # just same Nones self.__resetChangedData() self.__invalidatedChangedData = {} elif 'retrainable' in self._clf_internals: #self.__resetChangedData() self.__changedData_isset = False self._changedData = None self.__idhashes = None if __debug__ and 'CHECK_RETRAIN' in debug.active: self.__trained = None
def _predict(self, data): """Predict values for the data """ # libsvm needs doubles src = _data2ls(data) ca = self.ca predictions = [ self.model.predict(p) for p in src ] if ca.is_enabled('estimates'): if self.__is_regression__: estimates = [ self.model.predict_values_raw(p)[0] for p in src ] else: # if 'trained_targets' are literal they have to be mapped if np.issubdtype(self.ca.trained_targets.dtype, 'c'): trained_targets = self._attrmap.to_numeric( self.ca.trained_targets) else: trained_targets = self.ca.trained_targets nlabels = len(trained_targets) # XXX We do duplicate work. model.predict calls # predict_values_raw internally and then does voting or # thresholding. So if speed becomes a factor we might # want to move out logic from libsvm over here to base # predictions on obtined values, or adjust libsvm to # spit out values from predict() as well if nlabels == 2: # Apperently libsvm reorders labels so we need to # track (1,0) values instead of (0,1) thus just # lets take negative reverse estimates = [ self.model.predict_values(p)[(trained_targets[1], trained_targets[0])] for p in src ] if len(estimates) > 0: if __debug__: debug("SVM", "Forcing estimates to be ndarray and reshaping" " them into 1D vector") estimates = np.asarray(estimates).reshape(len(estimates)) else: # In multiclass we return dictionary for all pairs # of labels, since libsvm does 1-vs-1 pairs estimates = [ self.model.predict_values(p) for p in src ] ca.estimates = estimates if ca.is_enabled("probabilities"): # XXX Is this really necesssary? yoh don't think so since # assignment to ca is doing the same #self.probabilities = [ self.model.predict_probability(p) # for p in src ] try: ca.probabilities = [ self.model.predict_probability(p) for p in src ] except TypeError: warning("Current SVM %s doesn't support probability " % self + " estimation.") return predictions
def _set_retrainable(self, value, force=False): """Assign value of retrainable parameter If retrainable flag is to be changed, classifier has to be untrained. Also internal attributes such as _changedData, __changedData_isset, and __idhashes should be initialized if it becomes retrainable """ pretrainable = self.params["retrainable"] if (force or value != pretrainable.value) and "retrainable" in self.__tags__: if __debug__: debug("CLF_", "Setting retrainable to %s" % value) if "meta" in self.__tags__: warning( "Retrainability is not yet crafted/tested for " "meta classifiers. Unpredictable behavior might occur" ) # assure that we don't drag anything behind if self.trained: self.untrain() ca = self.ca if not value and ca.has_key("retrained"): ca.pop("retrained") ca.pop("repredicted") if value: if not "retrainable" in self.__tags__: warning( "Setting of flag retrainable for %s has no effect" " since classifier has no such capability. It would" " just lead to resources consumption and slowdown" % self ) ca["retrained"] = ConditionalAttribute(enabled=True, doc="Either retrainable classifier was retrained") ca["repredicted"] = ConditionalAttribute( enabled=True, doc="Either retrainable classifier was repredicted" ) pretrainable.value = value # if retrainable we need to keep track of things if value: self.__idhashes = {"traindata": None, "targets": None, "testdata": None} # , 'testtraindata': None} if __debug__ and "CHECK_RETRAIN" in debug.active: # ??? it is not clear though if idhash is faster than # simple comparison of (dataset != __traineddataset).any(), # but if we like to get rid of __traineddataset then we # should use idhash anyways self.__trained = self.__idhashes.copy() # just same Nones self.__reset_changed_data() self.__invalidatedChangedData = {} elif "retrainable" in self.__tags__: # self.__reset_changed_data() self.__changedData_isset = False self._changedData = None self.__idhashes = None if __debug__ and "CHECK_RETRAIN" in debug.active: self.__trained = None
def _predict(self, data): """Predict values for the data """ # libsvm needs doubles if data.dtype == 'float64': src = data else: src = data.astype('double') states = self.states predictions = [ self.model.predict(p) for p in src ] if states.isEnabled("values"): if self.regression: values = [ self.model.predictValuesRaw(p)[0] for p in src ] else: trained_labels = self.trained_labels nlabels = len(trained_labels) # XXX We do duplicate work. model.predict calls # predictValuesRaw internally and then does voting or # thresholding. So if speed becomes a factor we might # want to move out logic from libsvm over here to base # predictions on obtined values, or adjust libsvm to # spit out values from predict() as well if nlabels == 2: # Apperently libsvm reorders labels so we need to # track (1,0) values instead of (0,1) thus just # lets take negative reverse values = [ self.model.predictValues(p)[(trained_labels[1], trained_labels[0])] for p in src ] if len(values) > 0: if __debug__: debug("SVM", "Forcing values to be ndarray and reshaping" " them into 1D vector") values = N.asarray(values).reshape(len(values)) else: # In multiclass we return dictionary for all pairs # of labels, since libsvm does 1-vs-1 pairs values = [ self.model.predictValues(p) for p in src ] states.values = values if states.isEnabled("probabilities"): # XXX Is this really necesssary? yoh don't think so since # assignment to states is doing the same #self.probabilities = [ self.model.predictProbability(p) # for p in src ] try: states.probabilities = [ self.model.predictProbability(p) for p in src ] except TypeError: warning("Current SVM %s doesn't support probability " % self + " estimation.") return predictions
def _call(self, dataset, callables=[]): # local bindings model = self.clf.model nr_class = model.nr_class if nr_class != 2: warning("You are estimating sensitivity for SVM %s trained on %d" % (str(self.clf), self.clf.model.nr_class) + " classes. Make sure that it is what you intended to do" ) svcoef = N.matrix(model.getSVCoef()) svs = N.matrix(model.getSV()) rhos = N.asarray(model.getRho()) self.biases = rhos if self.split_weights: if nr_class != 2: raise NotImplementedError, \ "Cannot compute per-class weights for" \ " non-binary classification task" # libsvm might have different idea on the ordering # of labels, so we would need to map them back explicitely svm_labels = model.getLabels() # labels as assigned by libsvm ds_labels = list(dataset.uniquelabels) # labels in the dataset senses = [None for i in ds_labels] # first label is given positive value for i, (c, l) in enumerate( [(svcoef > 0, lambda x: x), (svcoef < 0, lambda x: x*-1)] ): # convert to array, and just take the meaningful dimension c_ = c.A[0] senses[ds_labels.index(svm_labels[i])] = \ (l(svcoef[:, c_] * svs[c_, :])).A[0] weights = N.array(senses) else: # XXX yoh: .mean() is effectively # averages across "sensitivities" of all paired classifiers (I # think). See more info on this topic in svm.py on how sv_coefs # are stored # # First multiply SV coefficients with the actuall SVs to get # weighted impact of SVs on decision, then for each feature # take mean across SVs to get a single weight value # per feature weights = svcoef * svs if __debug__: debug('SVM', "Extracting weights for %d-class SVM: #SVs=%s, " % \ (nr_class, str(model.getNSV())) + \ " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \ (svcoef.shape, svs.shape, rhos) + \ " Result: min=%f max=%f" % (N.min(weights), N.max(weights))) return N.asarray(weights.T)
def fit(self, measure, ds): """Fit the distribution by performing multiple cycles which repeatedly permuted labels in the training dataset. Parameters ---------- measure: Measure or None A measure used to compute the results from shuffled data. Can be None if a measure instance has been provided to the constructor. ds: `Dataset` which gets permuted and used to compute the measure/transfer error multiple times. """ # TODO: place exceptions separately so we could avoid circular imports from mvpa.base.learner import LearnerError # prefer the already assigned measure over anything the was passed to # the function. # XXX that is a bit awkward but is necessary to keep the code changes # in the rest of PyMVPA minimal till this behavior become mandatory if not self._measure is None: measure = self._measure measure.untrain() dist_samples = [] """Holds the values for randomized labels.""" # estimate null-distribution # TODO this really needs to be more clever! If data samples are # shuffled within a class it really makes no difference for the # classifier, hence the number of permutations to estimate the # null-distribution of transfer errors can be reduced dramatically # when the *right* permutations (the ones that matter) are done. skipped = 0 # # of skipped permutations for p, permuted_ds in enumerate(self.__permutator.generate(ds)): # new permutation all the time # but only permute the training data and keep the testdata constant # if __debug__: debug('STATMC', "Doing %i permutations: %i" \ % (self.__permutator.nruns, p+1), cr=True) # compute and store the measure of this permutation # assume it has `TransferError` interface try: res = measure(permuted_ds) dist_samples.append(res.samples) except LearnerError, e: if __debug__: debug('STATMC', " skipped", cr=True) warning('Failed to obtain value from %s due to %s. Measurement' ' was skipped, which could lead to unstable and/or' ' incorrect assessment of the null_dist' % (measure, e)) skipped += 1 continue
def _check_range(self, c): """ check and adjust the voxel coordinates""" # check range if __debug__: debug('ATL__', "Querying for voxel %r" % (c, )) if not check_range(c, self.extent): warning("Coordinates %r are not within the extent %r." \ " Reseting to (0,0,0)" % (c, self.extent)) # assume that voxel [0,0,0] is blank, i.e. carries # no labels which could possibly result in evil outcome c = [0] * 3 return c
def _check_range(self, c): """ check and adjust the voxel coordinates""" # check range if __debug__: debug('ATL__', "Querying for voxel %r" % (c,)) if not check_range(c, self.extent): warning("Coordinates %r are not within the extent %r." \ " Reseting to (0,0,0)" % (c, self.extent)) # assume that voxel [0,0,0] is blank, i.e. carries # no labels which could possibly result in evil outcome c = [0]*3 return c
def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(zip(col.keys(), [[] for i in col])) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict( zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs])) # let it generate all combinations of unique elements in any attr for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [ array_whereequal(col[attr].value, value) for attr, value in comb.iteritems() ]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning( 'There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = np.apply_along_axis(self.__fx, axis, samples, *self.__fxargs) mdata.append(fxed_samples) if not self.__attrfx is None: # and now all samples attributes fxed_attrs = [ self.__attrfx(col[attr].value[selector]) for attr in col ] for i, attr in enumerate(col): attrs[attr].append(fxed_attrs[i]) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs
def __init__(self, **kwargs): """ Initialize GLM-Net multinomial classifier. See the help in R for further details on the parameters """ # make sure they didn't specify regression if not kwargs.pop('family', None) is None: warning('You specified the "family" parameter, but we ' 'force this to be "multinomial".') # init base class first, forcing regression _GLMNET.__init__(self, family='multinomial', **kwargs)
def predict(self, dataset): """Predict classifier on data Shouldn't be overridden in subclasses unless explicitly needed to do so. Also subclasses trying to call super class's predict should call _predict if within _predict instead of predict() since otherwise it would loop """ ## ??? yoh: changed to asany from as without exhaustive check data = np.asanyarray(dataset.samples) if __debug__: debug("CLF", "Predicting classifier %(clf)s on ds %(dataset)s", msgargs={ 'clf': self, 'dataset': dataset }) # remember the time when started computing predictions t0 = time.time() ca = self.ca # to assure that those are reset (could be set due to testing # post-training) ca.reset(['estimates', 'predictions']) self._prepredict(dataset) if self.__trainednfeatures > 0 \ or 'notrain2predict' in self.__tags__: result = self._predict(dataset) else: warning( "Trying to predict using classifier trained on no features") if __debug__: debug("CLF", "No features were present for training, prediction is " \ "bogus") result = [None] * data.shape[0] ca.predicting_time = time.time() - t0 # with labels mapping in-place, we also need to go back to the # literal labels if self._attrmap: try: result = self._attrmap.to_literal(result) except KeyError, e: raise FailedToPredictError, \ "Failed to convert predictions from numeric into " \ "literals: %s" % e
def _forward_dataset_grouped(self, ds): mdata = [] # list of samples array pieces if self.__axis == 'samples': col = ds.sa axis = 0 elif self.__axis == 'features': col = ds.fa axis = 1 else: raise RuntimeError("This should not have happened!") attrs = dict(zip(col.keys(), [[] for i in col])) # create a dictionary for all unique elements in all attribute this # mapper should operate on self.__attrcombs = dict(zip(self.__uattrs, [col[attr].unique for attr in self.__uattrs])) # let it generate all combinations of unique elements in any attr for comb in _orthogonal_permutations(self.__attrcombs): selector = reduce(np.multiply, [array_whereequal(col[attr].value, value) for attr, value in comb.iteritems()]) # process the samples if axis == 0: samples = ds.samples[selector] else: samples = ds.samples[:, selector] # check if there were any samples for such a combination, # if not -- warning and skip the rest of the loop body if not len(samples): warning('There were no samples for combination %s. It might be ' 'a sign of a disbalanced dataset %s.' % (comb, ds)) continue fxed_samples = np.apply_along_axis(self.__fx, axis, samples, *self.__fxargs) mdata.append(fxed_samples) if not self.__attrfx is None: # and now all samples attributes fxed_attrs = [self.__attrfx(col[attr].value[selector]) for attr in col] for i, attr in enumerate(col): attrs[attr].append(fxed_attrs[i]) if axis == 0: mdata = np.vstack(mdata) else: mdata = np.vstack(np.transpose(mdata)) return mdata, attrs
def _reverse(self, data): if __debug__: debug('MAP', "Converting signal back using DWP") if self.__level is None: raise NotImplementedError else: if not externals.exists('pywt wp reconstruct'): raise NotImplementedError, \ "Reconstruction for a single level for versions of " \ "pywt < 0.1.7 (revision 103) is not supported" if not externals.exists('pywt wp reconstruct fixed'): warning("Reconstruction using available version of pywt might " "result in incorrect data in the tails of the signal") return self.__reverseSingleLevel(data)
def run_nose_tests(): """Run nose-based tests -- really really silly way, just to get started TODO: just switch to using numpy.testing framework, for that unittests need to be cleaned and unified first """ nosetests = collect_nose_tests() if not externals.exists('nose'): warning("You do not have python-nose installed -- no tests %s were ran" % (', '.join(nosetests))) return from nose import main # main.config.verbosity = int(cfg.get('tests', 'verbosity', default=1)) for nt in nosetests: main(defaultTest='mvpa.tests.' + nt, exit=False)
def __init__(self, gnb, splitter, qe, errorfx=MeanMismatchErrorFx(), indexsum=None, **kwargs): """Initialize a GNBSearchlight Parameters ---------- gnb : `GNB` `GNB` classifier as the specification of what GNB parameters to use. Instance itself isn't used. splitter : `Splitter` `Splitter` to use to compute the error. errorfx : func, optional Functor that computes a scalar error value from the vectors of desired and predicted values (e.g. subclass of `ErrorFunction`) indexsum : ('sparse', 'fancy'), optional What use to compute sums over arbitrary columns. 'fancy' corresponds to regular fancy indexing over columns, whenever in 'sparse', produce of sparse matrices is used (usually faster, so is default if `scipy` is available. """ # init base class first BaseSearchlight.__init__(self, qe, **kwargs) self._errorfx = errorfx self._splitter = splitter self._gnb = gnb if indexsum is None: if externals.exists('scipy'): indexsum = 'sparse' else: indexsum = 'fancy' else: if indexsum == 'sparse' and not externals.exists('scipy'): warning("Scipy.sparse isn't available so taking 'fancy' as " "'indexsum' method.") indexsum = 'fancy' self._indexsum = indexsum if not self._nproc in (None, 1): raise NotImplementedError, "For now only nproc=1 (or None for " \ "autodetection) is supported by GNBSearchlight"
def __init__(self, **kwargs): """ Initialize GLM-Net. See the help in R for further details on the parameters """ # make sure they didn't specify incompatible model regr_family = 'gaussian' family = kwargs.pop('family', regr_family).lower() if family != regr_family: warning('You specified the parameter family=%s, but we ' 'force this to be "%s" for regression.' % (family, regr_family)) family = regr_family # init base class first, forcing regression _GLMNET.__init__(self, family=family, **kwargs)
def predict(self, dataset): """Predict classifier on data Shouldn't be overridden in subclasses unless explicitly needed to do so. Also subclasses trying to call super class's predict should call _predict if within _predict instead of predict() since otherwise it would loop """ ## ??? yoh: changed to asany from as without exhaustive check data = np.asanyarray(dataset.samples) if __debug__: debug("CLF", "Predicting classifier %(clf)s on ds %(dataset)s", msgargs={'clf':self, 'dataset':dataset}) # remember the time when started computing predictions t0 = time.time() ca = self.ca # to assure that those are reset (could be set due to testing # post-training) ca.reset(['estimates', 'predictions']) self._prepredict(dataset) if self.__trainednfeatures > 0 \ or 'notrain2predict' in self.__tags__: result = self._predict(dataset) else: warning("Trying to predict using classifier trained on no features") if __debug__: debug("CLF", "No features were present for training, prediction is " \ "bogus") result = [None]*data.shape[0] ca.predicting_time = time.time() - t0 # with labels mapping in-place, we also need to go back to the # literal labels if self._attrmap: try: result = self._attrmap.to_literal(result) except KeyError, e: raise FailedToPredictError, \ "Failed to convert predictions from numeric into " \ "literals: %s" % e
def _train(self, data): """Train the classifier. For kNN it is degenerate -- just stores the data. """ self.__data = data if __debug__: if str(data.samples.dtype).startswith('uint') \ or str(data.samples.dtype).startswith('int'): warning("kNN: input data is in integers. " + \ "Overflow on arithmetic operations might result in"+\ " errors. Please convert dataset's samples into" +\ " floating datatype if any error is reported.") self.__weights = None # create dictionary with an item for each condition uniquelabels = data.sa[self.params.targets_attr].unique self.__votes_init = dict(zip(uniquelabels, [0] * len(uniquelabels)))
def _forward_dataset(self, ds): # local binding chunks_attr = self.__chunks_attr dtype = self.__dtype if __debug__ and not chunks_attr is None \ and np.array(get_nsamples_per_attr(ds, chunks_attr).values()).min() <= 2: warning("Z-scoring chunk-wise having a chunk with less than three " "samples will set features in these samples to either zero " "(with 1 sample in a chunk) " "or -1/+1 (with 2 samples in a chunk).") params = self.__params_dict if params is None: raise RuntimeError, \ "ZScoreMapper needs to be trained before call to forward" if self._secret_inplace_zscore: mds = ds else: # shallow copy to put the new stuff in mds = ds.copy(deep=False) # cast the data to float, since in-place operations below do not upcast! if np.issubdtype(mds.samples.dtype, np.integer): mds.samples = mds.samples.astype(dtype) if '__all__' in params: # we have a global parameter set mds.samples = self._zscore(mds.samples, *params['__all__']) else: # per chunk z-scoring for c in mds.sa[chunks_attr].unique: if not c in params: raise RuntimeError( "%s has no parameters for chunk '%s'. It probably " "wasn't present in the training dataset!?" % (self.__class__.__name__, c)) slicer = np.where(mds.sa[chunks_attr].value == c)[0] mds.samples[slicer] = self._zscore(mds.samples[slicer], *params[c]) return mds
def _wm_reverse(self, data): if __debug__: debug('MAP', "Converting signal back using DWP") if self.__level is None: raise NotImplementedError else: if not externals.exists('pywt wp reconstruct'): raise NotImplementedError, \ "Reconstruction for a single level for versions of " \ "pywt < 0.1.7 (revision 103) is not supported" if not externals.exists('pywt wp reconstruct fixed'): warning( "%s: Reverse mapping with this version of 'pywt' might " "result in incorrect data in the tails of the signal. " "Please check for an update of 'pywt', or be careful " "when interpreting the edges of the reverse mapped " "data." % self.__class__.__name__) return self.__reverse_single_level(data)
def _setdebug(obj, partname): """Helper to set level of debugging output for SG Parameters ---------- obj In SG debug output seems to be set per every object partname : str For what kind of object we are talking about... could be automated later on (TODO) """ if _M_DEBUG is None: return debugname = "SG_%s" % partname.upper() switch = { True: (_M_DEBUG, 'M_DEBUG', "enable"), False: (_M_ERROR, 'M_ERROR', "disable"), 'GCDEBUG': (_M_GCDEBUG, 'M_GCDEBUG', "enable") } if __debug__: if 'SG_GC' in debug.active: key = 'GCDEBUG' else: key = debugname in debug.active else: key = False sglevel, slevel, progressfunc = switch[key] if __debug__ and 'SG_' in debug.active: debug( "SG_", "Setting verbosity for shogun.%s instance: %s to %s" % (partname, ` obj `, slevel)) if sglevel is not None: obj.io.set_loglevel(sglevel) if __debug__ and 'SG_LINENO' in debug.active: try: obj.io.enable_file_and_line() except AttributeError, e: warning("Cannot enable SG_LINENO debug target for shogun %s" % externals.versions['shogun'])
def __call__(self, dataset): """Compute measure on a given `Dataset`. Each implementation has to handle a single arguments: the source dataset. Returns the computed measure in some iterable (list-like) container applying a post-processing mapper if such is defined. """ result = self._call(dataset) result = self._postcall(dataset, result) # XXX Remove when "sensitivity-return-dataset" transition is done if __debug__ \ and not isinstance(result, AttrDataset) \ and not len(result.shape) == 1: warning("Postprocessing of '%s' doesn't return a Dataset, or " "1D-array (got: '%s')." % (self.__class__.__name__, result)) return result
def run_tests_using_nose(limit=None, verbosity=1, exit_=False): """Run nose-based tests -- really really silly way, just to get started TODO: just switch to using numpy.testing framework, for that unittests need to be cleaned and unified first """ nosetests = collect_nose_tests(verbosity=verbosity) if not externals.exists('nose'): warning("You do not have python-nose installed. Some unittests were " "skipped: %s" % (', '.join(nosetests))) return from nose import main import nose import nose.config tests = collect_unit_tests(verbosity=verbosity) + nosetests config = nose.config.Config(verbosity=verbosity, plugins=nose.plugins.DefaultPluginManager()) if limit is None: # Lets see if we aren't missing any: if verbosity: import os, glob testfiles = glob.glob('%s%stest_*.py' % (os.path.dirname(__file__), os.path.sep)) not_tested = set([os.path.basename(f) for f in testfiles]) \ - set(['%s.py' % f for f in tests]) if len(not_tested): print( "T: Warning -- following test files were found but will " "not be tested: %s" % ', '.join(not_tested)) config.testNames = ['mvpa.tests.' + nt for nt in tests] else: config.testNames = [ 'mvpa.tests.' + nt for nt in tests if nt[5:] in limit ] # run the tests _ = main(defaultTest=(), config=config, exit=exit_)
def _get_default_c(self, data): """Compute default C TODO: for non-linear SVMs """ if self.params.kernel.__kernel_name__ == 'linear': datasetnorm = np.mean(np.sqrt(np.sum(data * data, axis=1))) if datasetnorm == 0: warning("Obtained degenerate data with zero norm for training " "of %s. Scaling of C cannot be done." % self) return 1.0 value = 1.0 / (datasetnorm**2) if __debug__: debug("SVM", "Default C computed to be %f" % value) else: warning("TODO: Computation of default C is not yet implemented" + " for non-linear SVMs. Assigning 1.0") value = 1.0 return value
def test_all_dependencies(force=False, verbosity=1): """ Test for all known dependencies. Parameters ---------- force : boolean Whether to force the test even if it has already been performed. """ # loop over all known dependencies for dep in _KNOWN: if not exists(dep, force): if verbosity: warning("%s is not available." % dep) if __debug__: debug('EXT', 'The following optional externals are present: %s' \ % [ k[5:] for k in cfg.options('externals') if k.startswith('have') \ and cfg.getboolean('externals', k) == True ])
def train(self, dataset): """Train classifier on a dataset Shouldn't be overridden in subclasses unless explicitly needed to do so """ if dataset.nfeatures == 0 or dataset.nsamples == 0: raise DegenerateInputError, \ "Cannot train classifier on degenerate data %s" % dataset if __debug__: debug("CLF", "Training classifier %(clf)s on dataset %(dataset)s", msgargs={ 'clf': self, 'dataset': dataset }) self._pretrain(dataset) # remember the time when started training t0 = time.time() if dataset.nfeatures > 0: result = self._train(dataset) else: warning("Trying to train on dataset with no features present") if __debug__: debug("CLF", "No features present for training, no actual training " \ "is called") result = None self.ca.training_time = time.time() - t0 self._posttrain(dataset) return result
def test_confusion_based_error(self, l_clf): train = datasets['uni2medium_train'] # to check if we fail to classify for 3 labels test3 = datasets['uni3medium_train'] err = ConfusionBasedError(clf=l_clf) terr = TransferError(clf=l_clf) self.failUnlessRaises(UnknownStateError, err, None) """Shouldn't be able to access the state yet""" l_clf.train(train) e, te = err(None), terr(train) self.failUnless( abs(e - te) < 1e-10, msg="ConfusionBasedError (%.2g) should be equal to TransferError " "(%.2g) on traindataset" % (e, te)) # this will print nasty WARNING but it is ok -- it is just checking code # NB warnings are not printed while doing whole testing warning("Don't worry about the following warning.") self.failIf(terr(test3) is None) # try copying the beast terr_copy = copy(terr)
def __init__(self, *args, **kwargs): """Initialize dummy report """ warning("You are using DummyReport - no action will be taken. " "Please install reportlab to enjoy reporting facility " "within PyMVPA")
def plot_erp(data, SR=500, onsets=None, pre=0.2, pre_onset=None, post=None, pre_mean=None, color='r', errcolor=None, errtype=None, ax=pl, ymult=1.0, *args, **kwargs): """Plot single ERP on existing canvas Parameters ---------- data : 1D or 2D ndarray The data array can either be 1D (samples over time) or 2D (trials x samples). In the first case a boxcar mapper is used to extract the respective trial timecourses given a list of trial onsets. In the latter case, each row of the data array is taken as the EEG signal timecourse of a particular trial. onsets : list(int) List of onsets (in samples not in seconds). SR : int, optional Sampling rate (1/s) of the signal. pre : float, optional Duration (in seconds) to be plotted prior to onset. pre_onset : float or None If data is already in epochs (2D) then pre_onset provides information on how many seconds pre-stimulus were used to generate them. If None, then pre_onset = pre post : float Duration (in seconds) to be plotted after the onset. pre_mean : float Duration (in seconds) at the beginning of the window which is used for deriving the mean of the signal. If None, pre_mean = pre errtype : None or 'ste' or 'std' or 'ci95' or list of previous three Type of error value to be computed per datapoint. 'ste' -- standard error of the mean, 'std' -- standard deviation 'ci95' -- 95% confidence interval (1.96 * ste), None -- no error margin is plotted (default) Optionally, multiple error types can be specified in a list. In that case all of them will be plotted. color : matplotlib color code, optional Color to be used for plotting the mean signal timecourse. errcolor : matplotlib color code Color to be used for plotting the error margin. If None, use main color but with weak alpha level ax : Target where to draw. ymult : float, optional Multiplier for the values. E.g. if negative-up ERP plot is needed: provide ymult=-1.0 *args, **kwargs Additional arguments to `pylab.plot`. Returns ------- array Mean ERP timeseries. """ if pre_mean is None: pre_mean = pre # set default pre_discard = 0 if onsets is not None: # if we need to extract ERPs if post is None: raise ValueError, \ "Duration post onsets must be provided if onsets are given" # trial timecourse duration duration = pre + post # We are working with a full timeline bcm = BoxcarMapper(onsets, boxlength = int(SR * duration), offset = -int(SR * pre)) erp_data = bcm(data) # override values since we are using Boxcar pre_onset = pre else: if pre_onset is None: pre_onset = pre if pre_onset < pre: warning("Pre-stimulus interval to plot %g is smaller than provided " "pre-stimulus captured interval %g, thus plot interval was " "adjusted" % (pre, pre_onset)) pre = pre_onset if post is None: # figure out post duration = float(data.shape[1]) / SR - pre_discard post = duration - pre else: duration = pre + post erp_data = data pre_discard = pre_onset - pre # Scale the data appropriately erp_data *= ymult # validity check -- we should have 2D matrix (trials x samples) if len(erp_data.shape) != 2: raise RuntimeError, \ "plot_erp() supports either 1D data with onsets, or 2D data " \ "(trials x sample_points). Shape of the data at the point " \ "is %s" % erp_data.shape if not (pre_mean == 0 or pre_mean is None): # mean of pre-onset signal accross trials erp_baseline = np.mean( erp_data[:, int((pre_onset-pre_mean)*SR):int(pre_onset*SR)]) # center data on pre-onset mean # NOTE: make sure that we make a copy of the data to don't # alter the original. Better be safe than sorry erp_data = erp_data - erp_baseline # generate timepoints and error ranges to plot filled error area # top -> # bottom <- time_points = np.arange(erp_data.shape[1]) * 1.0 / SR - pre_onset # if pre != pre_onset if pre_discard > 0: npoints = int(pre_discard * SR) time_points = time_points[npoints:] erp_data = erp_data[:, npoints:] # select only time points of interest (if post is provided) if post is not None: npoints = int(duration * SR) time_points = time_points[:npoints] erp_data = erp_data[:, :npoints] # compute mean signal timecourse accross trials erp_mean = np.mean(erp_data, axis=0) # give sane default if errtype is None: errtype = [] if not isinstance(errtype, list): errtype = [errtype] for et in errtype: # compute error per datapoint if et in ['ste', 'ci95']: erp_stderr = erp_data.std(axis=0) / np.sqrt(len(erp_data)) if et == 'ci95': erp_stderr *= 1.96 elif et == 'std': erp_stderr = erp_data.std(axis=0) else: raise ValueError, "Unknown error type '%s'" % errtype time_points2w = np.hstack((time_points, time_points[::-1])) error_top = erp_mean + erp_stderr error_bottom = erp_mean - erp_stderr error2w = np.hstack((error_top, error_bottom[::-1])) if errcolor is None: errcolor = color # plot error margin pfill = ax.fill(time_points2w, error2w, edgecolor=errcolor, facecolor=errcolor, alpha=0.2, zorder=3) # plot mean signal timecourse ax.plot(time_points, erp_mean, lw=2, color=color, zorder=4, *args, **kwargs) # ax.xaxis.set_major_locator(pl.MaxNLocator(4)) return erp_mean
if __debug__: debug( 'EXT', "Presence of %s is%s verified%s" % (dep, { True: '', False: ' NOT' }[result], estr)) if not result: if raise_ \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep if issueWarning is not None \ and cfg.getboolean('externals', 'issue warning', True): if issueWarning is True: warning("Required external '%s' was not found" % dep) else: warning(issueWarning) # store result in config manager if not cfg.has_section('externals'): cfg.add_section('externals') if result: cfg.set('externals', 'have ' + dep, 'yes') else: cfg.set('externals', 'have ' + dep, 'no') return result # Bind functions for some versions checkings
def coarsen_chunks(source, nchunks=4): """Change chunking of the dataset Group chunks into groups to match desired number of chunks. Makes sense if originally there were no strong groupping into chunks or each sample was independent, thus belonged to its own chunk Parameters ---------- source : Dataset or list of chunk ids dataset or list of chunk ids to operate on. If Dataset, then its chunks get modified nchunks : int desired number of chunks """ if isinstance(source, Dataset): chunks = source.chunks else: chunks = source chunks_unique = np.unique(chunks) nchunks_orig = len(chunks_unique) if nchunks_orig < nchunks: raise ValueError, \ "Original number of chunks is %d. Cannot coarse them " \ "to get %d chunks" % (nchunks_orig, nchunks) # figure out number of samples per each chunk counts = dict(zip(chunks_unique, [ 0 ] * len(chunks_unique))) for c in chunks: counts[c] += 1 # now we need to group chunks to get more or less equalized number # of samples per chunk. No sophistication is done -- just # consecutively group to get close to desired number of samples # per chunk avg_chunk_size = np.sum(counts.values())*1.0/nchunks chunks_groups = [] cur_chunk = [] nchunks = 0 cur_chunk_nsamples = 0 samples_counted = 0 for i, c in enumerate(chunks_unique): cc = counts[c] cur_chunk += [c] cur_chunk_nsamples += cc # time to get a new chunk? if (samples_counted + cur_chunk_nsamples >= (nchunks+1)*avg_chunk_size) or i==nchunks_orig-1: chunks_groups.append(cur_chunk) samples_counted += cur_chunk_nsamples cur_chunk_nsamples = 0 cur_chunk = [] nchunks += 1 if len(chunks_groups) != nchunks: warning("Apparently logic in coarseChunks is wrong. " "It was desired to get %d chunks, got %d" % (nchunks, len(chunks_groups))) # remap using groups # create dictionary chunks_map = {} for i, group in enumerate(chunks_groups): for c in group: chunks_map[c] = i # we always want an array! chunks_new = np.array([chunks_map[x] for x in chunks]) if __debug__: debug("DS_", "Using dictionary %s to remap old chunks %s into new %s" % (chunks_map, chunks, chunks_new)) if isinstance(source, Dataset): if __debug__: debug("DS", "Coarsing %d chunks into %d chunks for %s" %(nchunks_orig, len(chunks_new), source)) source.sa['chunks'].value = chunks_new return else: return chunks_new
from mvpa.kernels.sg import SGKernel, LinearSGKernel # set the default kernel here, to be able to import this module # when building the docs without SG _default_kernel_class_ = LinearSGKernel # Figure out debug IDs once and for all if hasattr(shogun.Classifier, 'M_DEBUG'): _M_DEBUG = shogun.Classifier.M_DEBUG _M_ERROR = shogun.Classifier.M_ERROR _M_GCDEBUG = None elif hasattr(shogun.Classifier, 'MSG_DEBUG'): _M_DEBUG = shogun.Classifier.MSG_DEBUG _M_ERROR = shogun.Classifier.MSG_ERROR else: _M_DEBUG, _M_ERROR = None, None warning("Could not figure out debug IDs within shogun. " "No control over shogun verbosity would be provided") # Highest level if hasattr(shogun.Classifier, 'MSG_GCDEBUG'): _M_GCDEBUG = shogun.Classifier.MSG_GCDEBUG else: _M_GCDEBUG = None else: # set a fake default kernel here, to be able to import this module # when building the docs without SG _default_kernel_class_ = None import operator from mvpa.misc.param import Parameter from mvpa.misc.attrmap import AttributeMap
def _set_retrainable(self, value, force=False): """Assign value of retrainable parameter If retrainable flag is to be changed, classifier has to be untrained. Also internal attributes such as _changedData, __changedData_isset, and __idhashes should be initialized if it becomes retrainable """ pretrainable = self.params['retrainable'] if (force or value != pretrainable.value) \ and 'retrainable' in self.__tags__: if __debug__: debug("CLF_", "Setting retrainable to %s" % value) if 'meta' in self.__tags__: warning("Retrainability is not yet crafted/tested for " "meta classifiers. Unpredictable behavior might occur") # assure that we don't drag anything behind if self.trained: self.untrain() ca = self.ca if not value and ca.has_key('retrained'): ca.pop('retrained') ca.pop('repredicted') if value: if not 'retrainable' in self.__tags__: warning( "Setting of flag retrainable for %s has no effect" " since classifier has no such capability. It would" " just lead to resources consumption and slowdown" % self) ca['retrained'] = ConditionalAttribute( enabled=True, doc="Either retrainable classifier was retrained") ca['repredicted'] = ConditionalAttribute( enabled=True, doc="Either retrainable classifier was repredicted") pretrainable.value = value # if retrainable we need to keep track of things if value: self.__idhashes = { 'traindata': None, 'targets': None, 'testdata': None } #, 'testtraindata': None} if __debug__ and 'CHECK_RETRAIN' in debug.active: # ??? it is not clear though if idhash is faster than # simple comparison of (dataset != __traineddataset).any(), # but if we like to get rid of __traineddataset then we # should use idhash anyways self.__trained = self.__idhashes.copy() # just same Nones self.__reset_changed_data() self.__invalidatedChangedData = {} elif 'retrainable' in self.__tags__: #self.__reset_changed_data() self.__changedData_isset = False self._changedData = None self.__idhashes = None if __debug__ and 'CHECK_RETRAIN' in debug.active: self.__trained = None
from mvpa.misc.exceptions import ConvergenceError from mvpa.misc.param import Parameter from mvpa.misc.state import ConditionalAttribute from mvpa.datasets.base import Dataset __all__ = ["SMLR", "SMLRWeights"] _DEFAULT_IMPLEMENTATION = "Python" if externals.exists('ctypes'): # Uber-fast C-version of the stepwise regression try: from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression _DEFAULT_IMPLEMENTATION = "C" except OSError, e: warning("Failed to load fast implementation of SMLR. May be you " "forgotten to build it. We will use much slower pure-Python " "version") _cStepwiseRegression = None else: _cStepwiseRegression = None warning("SMLR implementation without ctypes is overwhelmingly slow." " You are strongly advised to install python-ctypes") if __debug__: from mvpa.base import debug def _label2oneofm(labels, ulabels): """Convert labels to one-of-M form. TODO: Might be useful elsewhere so could migrate into misc/