def test_null_dist_prob(self, null): """Testing null dist probability""" if not isinstance(null, NullDist): return ds = datasets['uni2small'] null.fit(OneWayAnova(), ds) # check reasonable output. # p-values for non-bogus features should significantly different, # while bogus (0) not prob = null.p([20, 0, 0, 0, 0, np.nan]) # XXX this is labile! it also needs checking since the F-scores # of the MCNullDists using normal distribution are apparently not # distributed that way, hence the test often (if not always) fails. if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(np.abs(prob[0]) < 0.05, msg="Expected small p, got %g" % prob[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless((np.abs(prob[1:]) > 0.05).all(), msg="Bogus features should have insignificant p." " Got %s" % (np.abs(prob[1:]),)) # has to have matching shape if not isinstance(null, FixedNullDist): # Fixed dist is univariate ATM so it doesn't care # about dimensionality and gives 1 output value self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
def test_null_dist_prob(self, null): """Testing null dist probability""" if not isinstance(null, NullDist): return ds = datasets['uni2small'] null.fit(OneWayAnova(), ds) # check reasonable output. # p-values for non-bogus features should significantly different, # while bogus (0) not prob = null.p([20, 0, 0, 0, 0, np.nan]) # XXX this is labile! it also needs checking since the F-scores # of the MCNullDists using normal distribution are apparently not # distributed that way, hence the test often (if not always) fails. if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(np.abs(prob[0]) < 0.05, msg="Expected small p, got %g" % prob[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless((np.abs(prob[1:]) > 0.05).all(), msg="Bogus features should have insignificant p." " Got %s" % (np.abs(prob[1:]), )) # has to have matching shape if not isinstance(null, FixedNullDist): # Fixed dist is univariate ATM so it doesn't care # about dimensionality and gives 1 output value self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
def testSimpleSOM(self): colors = [[0., 0., 0.], [0., 0., 1.], [0., 1., 0.], [1., 0., 0.], [0., 1., 1.], [1., 0., 1.], [1., 1., 0.], [1., 1., 1.]] ds = Dataset(samples=colors, labels=1) # only small SOM for speed reasons som = SimpleSOMMapper((10, 5), 200, learning_rate=0.05) # no acces when nothing is there self.failUnlessRaises(RuntimeError, som._accessKohonen) self.failUnlessRaises(RuntimeError, som.getInSize) self.failUnlessRaises(RuntimeError, som.getOutSize) som.train(ds) self.failUnless(som.getInSize() == 3) self.failUnless(som.getOutSize() == (10,5)) fmapped = som(colors) self.failUnless(fmapped.shape == (8, 2)) for fm in fmapped: self.failUnless(som.isValidOutId(fm)) # reverse mapping rmapped = som.reverse(fmapped) if cfg.getboolean('tests', 'labile', default='yes'): # should approximately restore the input, but could fail # with bas initialisation self.failUnless((N.round(rmapped) == ds.samples).all())
def testAnova(self): """Do some extended testing of OneWayAnova in particular -- compound estimation """ m = OneWayAnova() # default must be not compound ? mc = CompoundOneWayAnova(combiner=None) ds = datasets['uni2medium'] # For 2 labels it must be identical for both and equal to # simple OneWayAnova a, ac = m(ds), mc(ds) self.failUnless(a.shape == (ds.nfeatures,)) self.failUnless(ac.shape == (ds.nfeatures, len(ds.uniquelabels))) self.failUnless((ac[:, 0] == ac[:, 1]).all()) self.failUnless((a == ac[:, 1]).all()) ds = datasets['uni4large'] ac = mc(ds) if cfg.getboolean('tests', 'labile', default='yes'): # All non-bogus features must be high for a corresponding feature self.failUnless((ac[(N.array(ds.nonbogus_features), N.arange(4))] >= 1).all()) # All features should have slightly but different CompoundAnova # values. I really doubt that there will be a case when this # test would fail just to being 'labile' self.failUnless(N.max(N.std(ac, axis=1))>0, msg='In compound anova, we should get different' ' results for different labels. Got %s' % ac)
def test_simple_som(self): colors = np.array( [ [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], ] ) # only small SOM for speed reasons som = SimpleSOMMapper((10, 5), 200, learning_rate=0.05) # no acces when nothing is there self.failUnlessRaises(RuntimeError, som._access_kohonen) som.train(colors) fmapped = som.forward(colors) self.failUnless(fmapped.shape == (8, 2)) # reverse mapping rmapped = som.reverse(fmapped) if cfg.getboolean("tests", "labile", default="yes"): # should approximately restore the input, but could fail # with bad initialisation self.failUnless((np.round(rmapped) == colors).all())
def collect_nose_tests(verbosity=1): """Return list of tests which are pure nose-based """ tests = [ # Basic data structures/manipulators 'test_collections', 'test_attrmap', # Datasets 'test_datasetng', 'test_datasetfx', 'test_splitter', 'test_niftidataset', 'test_eepdataset', 'test_erdataset', # Misc supporting 'test_neighborhood', 'test_stats', 'test_stats_sp', # Mappers 'test_mapper', 'test_mapper_sp', 'test_arraymapper', 'test_boxcarmapper', 'test_prototypemapper', 'test_fxmapper', 'test_zscoremapper', 'test_waveletmapper', 'test_mdp', 'test_filters', # Learners 'test_enet', 'test_spam', 'test_lars', 'test_glmnet', 'test_kernel', 'test_svmkernels', # Algorithms 'test_emp_null', 'test_clfcrossval', # IO 'test_iohelpers', 'test_hdf5', 'test_hdf5_clf', # Measures 'test_transerror', 'test_datameasure', ] if not cfg.getboolean('tests', 'lowmem', default='no'): tests += ['test_atlases'] return tests
def do_sweep(*args_, **kwargs_): def untrain_clf(argvalue): """Little helper""" if isinstance(argvalue, Classifier): # clear classifier after its use -- just to be sure ;-) argvalue.retrainable = False argvalue.untrain() failed_tests = {} for argname in kwargs.keys(): for argvalue in kwargs[argname]: if isinstance(argvalue, Classifier): # clear classifier before its use argvalue.untrain() if isinstance(argvalue, ClassWithCollections): argvalue.states.reset() # update kwargs_ kwargs_[argname] = argvalue # do actual call try: if __debug__: debug('TEST', 'Running %s on args=%s and kwargs=%s' % (method.__name__, `args_`, `kwargs_`)) method(*args_, **kwargs_) except AssertionError, e: estr = str(e) etype, value, tb = sys.exc_info() # literal representation of exception tb, so # we could group them later on eidstr = ' '.join( [l for l in traceback.format_exception(etype, value, tb) if not ('do_sweep' in l or 'unittest.py' in l or 'AssertionError' in l or 'Traceback (most' in l)]) # Store exception information for later on groupping if not eidstr in failed_tests: failed_tests[eidstr] = [] failed_tests[eidstr].append( # skip top-most tb in sweep_args (argname, `argvalue`, tb.tb_next, estr)) if __debug__: msg = "%s on %s=%s" % (estr, argname, `argvalue`) debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg)) untrain_clf(argvalue) # TODO: handle different levels of unittests properly if cfg.getboolean('tests', 'quick', False): # on TESTQUICK just run test for 1st entry in the list, # the rest are omitted # TODO: proper partitioning of unittests break
def test_glmnet_r(): # not the perfect dataset with which to test, but # it will do for now. #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = GLMNET_R() clf.train(data) # prediction has to be almost perfect # test with a correlation pre = clf.predict(data.samples) corerr = corr_error(pre, data.targets) if cfg.getboolean('tests', 'labile', default='yes'): assert_true(corerr < .2)
def test_enet(self): # not the perfect dataset with which to test, but # it will do for now. #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = ENET() clf.train(data) # prediction has to be almost perfect # test with a correlation pre = clf.predict(data.samples) cor = pearsonr(pre, data.targets) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(cor[0] > .8)
def test_glmnet_r(): # not the perfect dataset with which to test, but # it will do for now. #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = GLMNET_R() clf.train(data) # prediction has to be almost perfect # test with a correlation pre = clf.predict(data.samples) corerr = CorrErrorFx()(pre, data.targets) if cfg.getboolean('tests', 'labile', default='yes'): assert_true(corerr < .2)
def collect_nose_tests(): """Return list of tests which are pure nose-based """ tests = [ 'test_collections', 'test_datasetng', 'test_attrmap', 'test_arraymapper', 'test_boxcarmapper', 'test_mapper', 'test_mapper_sp', 'test_fxmapper', 'test_glmnet', 'test_hdf5', 'test_neighborhood', 'test_mdp', 'test_niftidataset', 'test_eepdataset', 'test_erdataset', 'test_zscoremapper', 'test_kernel', 'test_svmkernels', 'test_waveletmapper', 'test_emp_null', 'test_transerror', ] if not cfg.getboolean('tests', 'lowmem', default='no'): tests += ['test_atlases'] ## SkipTest will take care about marking those as S ## if externals.exists('scipy'): ## tests += ['test_mapper_sp'] ## if externals.exists('glmnet'): ## tests += ['test_glmnet'] ## if externals.exists('nifti'): ## tests += ['test_niftidataset'] ## if externals.exists('mdp'): ## tests += ['test_mdp'] ## if externals.exists('h5py'): ## tests += ['test_hdf5'] return tests
def __check_rpy(): """Check either rpy is available and also set it for the sane execution """ #import rpy_options #rpy_options.set_options(VERBOSE=False, SETUP_READ_CONSOLE=False) # SETUP_WRITE_CONSOLE=False) #rpy_options.set_options(VERBOSE=False, SETUP_WRITE_CONSOLE=False) # SETUP_WRITE_CONSOLE=False) # if not cfg.get('rpy', 'read_console', default=False): # print "no read" # rpy_options.set_options(SETUP_READ_CONSOLE=False) # if not cfg.get('rpy', 'write_console', default=False): # print "no write" # rpy_options.set_options(SETUP_WRITE_CONSOLE=False) import rpy if not cfg.getboolean('rpy', 'interactive', default=True) \ and (rpy.get_rpy_input() is rpy.rpy_io.rpy_input): if __debug__: debug('EXT_', "RPy: providing dummy callback for input to return '1'") def input1(*args): return "1" # which is "1: abort (with core dump, if enabled)" rpy.set_rpy_input(input1)
def testAllDependencies(force=False): """ Test for all known dependencies. :Parameters: force : boolean Whether to force the test even if it has already been performed. """ # loop over all known dependencies for dep in _KNOWN: if not exists(dep, force): warning("%s is not available." % dep) if __debug__: debug('EXT', 'The following optional externals are present: %s' \ % [ k[5:] for k in cfg.options('externals') if k.startswith('have') \ and cfg.getboolean('externals', k) == True ])
def test_all_dependencies(force=False, verbosity=1): """ Test for all known dependencies. Parameters ---------- force : boolean Whether to force the test even if it has already been performed. """ # loop over all known dependencies for dep in _KNOWN: if not exists(dep, force): if verbosity: warning("%s is not available." % dep) if __debug__: debug('EXT', 'The following optional externals are present: %s' \ % [ k[5:] for k in cfg.options('externals') if k.startswith('have') \ and cfg.getboolean('externals', k) == True ])
def test_anova(self): """Do some extended testing of OneWayAnova in particular -- compound estimation """ m = OneWayAnova() # default must be not compound ? mc = CompoundOneWayAnova() ds = datasets['uni2medium'] # For 2 labels it must be identical for both and equal to # simple OneWayAnova a, ac = m(ds), mc(ds) self.failUnless(a.shape == (1, ds.nfeatures)) self.failUnless(ac.shape == (len(ds.UT), ds.nfeatures)) assert_array_equal(ac[0], ac[1]) assert_array_equal(a, ac[1]) # check for p-value attrs if externals.exists('scipy'): assert_true('fprob' in a.fa.keys()) assert_equal(len(ac.fa), len(ac)) ds = datasets['uni4large'] ac = mc(ds) if cfg.getboolean('tests', 'labile', default='yes'): # All non-bogus features must be high for a corresponding feature self.failUnless( (ac.samples[np.arange(4), np.array(ds.a.nonbogus_features)] >= 1).all()) # All features should have slightly but different CompoundAnova # values. I really doubt that there will be a case when this # test would fail just to being 'labile' self.failUnless(np.max(np.std(ac, axis=1)) > 0, msg='In compound anova, we should get different' ' results for different labels. Got %s' % ac)
def test_anova(self): """Do some extended testing of OneWayAnova in particular -- compound estimation """ m = OneWayAnova() # default must be not compound ? mc = CompoundOneWayAnova() ds = datasets['uni2medium'] # For 2 labels it must be identical for both and equal to # simple OneWayAnova a, ac = m(ds), mc(ds) self.failUnless(a.shape == (1, ds.nfeatures)) self.failUnless(ac.shape == (len(ds.UT), ds.nfeatures)) assert_array_equal(ac[0], ac[1]) assert_array_equal(a, ac[1]) # check for p-value attrs if externals.exists('scipy'): assert_true('fprob' in a.fa.keys()) assert_equal(len(ac.fa), len(ac)) ds = datasets['uni4large'] ac = mc(ds) if cfg.getboolean('tests', 'labile', default='yes'): # All non-bogus features must be high for a corresponding feature self.failUnless((ac.samples[np.arange(4), np.array(ds.a.nonbogus_features)] >= 1 ).all()) # All features should have slightly but different CompoundAnova # values. I really doubt that there will be a case when this # test would fail just to being 'labile' self.failUnless(np.max(np.std(ac, axis=1))>0, msg='In compound anova, we should get different' ' results for different labels. Got %s' % ac)
def test_simple_som(self): colors = np.array([[0., 0., 0.], [0., 0., 1.], [0., 1., 0.], [1., 0., 0.], [0., 1., 1.], [1., 0., 1.], [1., 1., 0.], [1., 1., 1.]]) # only small SOM for speed reasons som = SimpleSOMMapper((10, 5), 200, learning_rate=0.05) # no acces when nothing is there self.failUnlessRaises(RuntimeError, som._access_kohonen) som.train(colors) fmapped = som(colors) self.failUnless(fmapped.shape == (8, 2)) # reverse mapping rmapped = som.reverse(fmapped) if cfg.getboolean('tests', 'labile', default='yes'): # should approximately restore the input, but could fail # with bad initialisation self.failUnless((np.round(rmapped) == colors).all())
def exists(dep, force=False, raise_=False, issueWarning=None): """ Test whether a known dependency is installed on the system. This method allows us to test for individual dependencies without testing all known dependencies. It also ensures that we only test for a dependency once. Parameters ---------- dep : string or list of string The dependency key(s) to test. force : boolean Whether to force the test even if it has already been performed. raise_ : boolean Whether to raise RuntimeError if dependency is missing. issueWarning : string or None or True If string, warning with given message would be thrown. If True, standard message would be used for the warning text. """ # if we are provided with a list of deps - go through all of them if isinstance(dep, list) or isinstance(dep, tuple): results = [exists(dep_, force, raise_) for dep_ in dep] return bool(reduce(lambda x, y: x and y, results, True)) # where to look in cfg cfgid = 'have ' + dep # prevent unnecessarry testing if cfg.has_option('externals', cfgid) \ and not cfg.getboolean('externals', 'retest', default='no') \ and not force: if __debug__: debug('EXT', "Skip retesting for '%s'." % dep) # check whether an exception should be raised, even though the external # was already tested previously if not cfg.getboolean('externals', cfgid) \ and raise_ \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep return cfg.getboolean('externals', cfgid) # determine availability of external (non-cached) # default to 'not found' result = False if not _KNOWN.has_key(dep): raise ValueError, "%s is not a known dependency key." % (dep) else: # try and load the specific dependency if __debug__: debug('EXT', "Checking for the presence of %s" % dep) # Exceptions which are silently caught while running tests for externals _caught_exceptions = [ImportError, AttributeError, RuntimeError] estr = '' try: exec _KNOWN[dep] result = True except tuple(_caught_exceptions), e: estr = ". Caught exception was: " + str(e) except Exception, e: # Add known ones by their names so we don't need to # actually import anything manually to get those classes if e.__class__.__name__ in [ 'RPy_Exception', 'RRuntimeError', 'RPy_RException' ]: _caught_exceptions += [e.__class__] estr = ". Caught exception was: " + str(e) else: raise
# Add known ones by their names so we don't need to # actually import anything manually to get those classes if e.__class__.__name__ in ['RPy_Exception', 'RRuntimeError', 'RPy_RException']: _caught_exceptions += [e.__class__] estr = ". Caught exception was: " + str(e) else: raise if __debug__: debug('EXT', "Presence of %s is%s verified%s" % (dep, {True:'', False:' NOT'}[result], estr)) if not result: if raise_ \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep if issueWarning is not None \ and cfg.getboolean('externals', 'issue warning', True): if issueWarning is True: warning("Required external '%s' was not found" % dep) else: warning(issueWarning) # store result in config manager if not cfg.has_section('externals'): cfg.add_section('externals') if result: cfg.set('externals', 'have ' + dep, 'yes') else:
for ds in datasets: for mname, mapper in mappers.iteritems(): mapper.train(ds) dproj = mapper.forward(ds.samples) mproj = mapper.proj pl.subplot(ndatasets, nmappers, fig) if fig <= 3: pl.title(mname) pl.axis('equal') pl.scatter(ds.samples[:, 0] - center[0], ds.samples[:, 1] - center[1], s=30, c=(ds.sa.targets) * 200) plot_proj_dir(mproj) fig += 1 if cfg.getboolean('examples', 'interactive', True): pl.show() """ Output of the example: .. image:: ../pics/ex_projections.* :align: center :alt: SVD/ICA/PCA projections """
estr = ". Caught exception was: " + str(e) else: raise finally: # And restore warnings np.seterr(**olderr) if __debug__: debug('EXT', "Presence of %s is%s verified%s" % (dep, {True:'', False:' NOT'}[result], estr)) if not result: if raise_: raise RuntimeError, "Required external '%s' was not found" % dep if issueWarning is not None \ and cfg.getboolean('externals', 'issue warning', True): if issueWarning is True: warning("Required external '%s' was not found" % dep) else: warning(issueWarning) # store result in config manager if not cfg.has_section('externals'): cfg.add_section('externals') if result: cfg.set('externals', 'have ' + dep, 'yes') else: cfg.set('externals', 'have ' + dep, 'no') return result
Dimensionality of target space data : array, optional Some data (should have rank high enough) to derive rotation """ if nt is None: nt = ns # figure out some "random" rotation d = max(ns, nt) if data is None: data = np.random.normal(size=(d*10, d)) _u, _s, _vh = np.linalg.svd(data[:, :d]) R = _vh[:ns, :nt] if ns == nt: # Test if it is indeed a rotation matrix ;) # Lets flip first axis if necessary if np.linalg.det(R) < 0: R[:, 0] *= -1.0 return R datasets = generate_testing_datasets(specs) if cfg.getboolean('tests', 'use hdf datasets', False): if not externals.exists('h5py'): raise RuntimeError( "Cannot perform HDF5 dump of all datasets in the warehouse, " "because 'h5py' is not available") datasets = saveload_warehouse() print "Replaced all dataset warehouse for HDF5 loaded alternative."
def enhanced_doc_string(item, *args, **kwargs): """Generate enhanced doc strings for various items. Parameters ---------- item : str or class What object requires enhancing of documentation *args : list Includes base classes to look for parameters, as well, first item must be a dictionary of locals if item is given by a string force_extend : bool Either to force looking for the documentation in the parents. By default force_extend = False, and lookup happens only if kwargs is one of the arguments to the respective function (e.g. item.__init__) skip_params : list of str List of parameters (in addition to [kwargs]) which should not be added to the documentation of the class. It is to be used from a collector, ie whenever class is already created """ # Handling of arguments if len(kwargs): if set(kwargs.keys()).issubset(set(['force_extend'])): raise ValueError, "Got unknown keyword arguments (smth among %s)" \ " in enhanced_doc_string." % kwargs force_extend = kwargs.get('force_extend', False) skip_params = kwargs.get('skip_params', []) # XXX make it work also not only with classes but with methods as well if isinstance(item, basestring): if len(args) < 1 or not isinstance(args[0], dict): raise ValueError, \ "Please provide locals for enhanced_doc_string of %s" % item name = item lcl = args[0] args = args[1:] elif hasattr(item, "im_class"): # bound method raise NotImplementedError, \ "enhanced_doc_string is not yet implemented for methods" elif hasattr(item, "__name__"): name = item.__name__ lcl = item.__dict__ else: raise ValueError, "Don't know how to extend docstring for %s" % item # check whether docstring magic is requested or not if not cfg.getboolean('doc', 'pimp docstrings', True): return lcl['__doc__'] if __debug__: debug('DOCH', 'Processing docstrings of %s' % name) #return lcl['__doc__'] rst_lvlmarkup = ["=", "-", "_"] # would then be called for any child... ok - ad hoc for SVM??? if hasattr(item, '_customize_doc') and name == 'SVM': item._customize_doc() initdoc = "" if lcl.has_key('__init__'): func = lcl['__init__'] initdoc = func.__doc__ # either to extend arguments # do only if kwargs is one of the arguments # in python 2.5 args are no longer in co_names but in varnames extend_args = force_extend or \ 'kwargs' in (func.func_code.co_names + func.func_code.co_varnames) if __debug__ and not extend_args: debug('DOCH', 'Not extending parameters for __init__ of %s' % name) if initdoc is None: initdoc = "Initialize instance of %s" % name initdoc, params, suffix = _split_out_parameters(initdoc) params_list = _parse_parameters(params) known_params = set([i[0] for i in params_list]) # If there are additional ones: if lcl.has_key('_paramsdoc'): params_list += [ i for i in lcl['_paramsdoc'] if not (i[0] in known_params) ] known_params = set([i[0] for i in params_list]) # no need for placeholders skip_params = set(skip_params + ['kwargs', '**kwargs']) # XXX we do evil check here, refactor code to separate # regressions out of the classifiers, and making # retrainable flag not available for those classes which # can't actually do retraining. Although it is not # actually that obvious for Meta Classifiers if hasattr(item, '__tags__'): clf_internals = item.__tags__ skip_params.update( [i for i in ('retrainable', ) if not (i in clf_internals)]) known_params.update(skip_params) if extend_args: # go through all the parents and obtain their init parameters parent_params_list = [] for i in args: if hasattr(i, '__init__'): # XXX just assign within a class to don't redo without need initdoc_ = i.__init__.__doc__ if initdoc_ is None: continue splits_ = _split_out_parameters(initdoc_) params_ = splits_[1] parent_params_list += _parse_parameters(params_.lstrip()) # extend with ones which are not known to current init for i, v in parent_params_list: if not (i in known_params): params_list += [(i, v)] known_params.update([i]) # if there are parameters -- populate the list if len(params_list): params_ = '\n'.join([ i[1].rstrip() for i in params_list if not i[0] in skip_params ]) initdoc += "\n\n%s\n" \ % _rst_section('Parameters') + _indent(params_) if suffix != "": initdoc += "\n\n" + suffix initdoc = handle_docstring(initdoc) # Finally assign generated doc to the constructor lcl['__init__'].__doc__ = initdoc docs = [handle_docstring(lcl['__doc__'])] # Optionally populate the class documentation with it if __add_init2doc and initdoc != "": docs += [ _rst_underline('Constructor information for `%s` class' % name, rst_lvlmarkup[2]), initdoc ] # Add information about the ca if available if lcl.has_key('_cadoc') and len(item._cadoc): # to don't conflict with Notes section if such was already # present lcldoc = lcl['__doc__'] or '' if not 'Notes' in lcldoc: section_name = _rst_section('Notes') else: section_name = '\n' # just an additional newline # no indent is necessary since ca list must be already indented docs += [ '%s\nAvailable conditional attributes:' % section_name, handle_docstring(item._cadoc) ] # Deprecated -- but actually we might like to have it in ipython # mode may be? if False: #len(args): bc_intro = _rst(' ') + 'Please refer to the documentation of the ' \ 'base %s for more information:' \ % (single_or_plural('class', 'classes', len(args))) docs += [ '\n' + _rst_section('See Also'), bc_intro, ' ' + ',\n '.join([ '%s%s.%s%s%s' % (_rst(':class:`~'), i.__module__, i.__name__, _rst('`'), _rst_sep) for i in args ]) ] itemdoc = '\n\n'.join(docs) # remove some bogus new lines -- never 3 empty lines in doc are useful result = re.sub("\s*\n\s*\n\s*\n", "\n\n", itemdoc) return result
def exists(dep, force=False, raiseException=False, issueWarning=None): """ Test whether a known dependency is installed on the system. This method allows us to test for individual dependencies without testing all known dependencies. It also ensures that we only test for a dependency once. :Parameters: dep : string or list of string The dependency key(s) to test. force : boolean Whether to force the test even if it has already been performed. raiseException : boolean Whether to raise RuntimeError if dependency is missing. issueWarning : string or None or True If string, warning with given message would be thrown. If True, standard message would be used for the warning text. """ # if we are provided with a list of deps - go through all of them if isinstance(dep, list) or isinstance(dep, tuple): results = [ exists(dep_, force, raiseException) for dep_ in dep ] return bool(reduce(lambda x,y: x and y, results, True)) # where to look in cfg cfgid = 'have ' + dep # prevent unnecessarry testing if cfg.has_option('externals', cfgid) \ and not cfg.getboolean('externals', 'retest', default='no') \ and not force: if __debug__: debug('EXT', "Skip retesting for '%s'." % dep) # check whether an exception should be raised, even though the external # was already tested previously if not cfg.getboolean('externals', cfgid) \ and raiseException \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep return cfg.getboolean('externals', cfgid) # determine availability of external (non-cached) # default to 'not found' result = False if not _KNOWN.has_key(dep): raise ValueError, "%s is not a known dependency key." % (dep) else: # try and load the specific dependency if __debug__: debug('EXT', "Checking for the presence of %s" % dep) # Exceptions which are silently caught while running tests for externals _caught_exceptions = [ImportError, AttributeError, RuntimeError] # check whether RPy is involved and catch its excpetions as well. # however, try to determine whether this is really necessary, as # importing RPy also involved starting a full-blown R session, which can # take seconds and therefore is quite nasty... if dep.count('rpy') or _KNOWN[dep].count('rpy'): try: if dep == 'rpy': __check_rpy() # needed to be run to adjust options first else: if exists('rpy'): # otherwise no need to add anything -- test # would fail since rpy isn't available from rpy import RException _caught_exceptions += [RException] except: pass estr = '' try: exec _KNOWN[dep] result = True except tuple(_caught_exceptions), e: estr = ". Caught exception was: " + str(e) if __debug__: debug('EXT', "Presence of %s is%s verified%s" % (dep, {True:'', False:' NOT'}[result], estr))
def do_sweep(*args_, **kwargs_): """Perform sweeping over provided keyword arguments """ def untrain_clf(argvalue): """Little helper""" if isinstance(argvalue, Classifier): # clear classifier after its use -- just to be sure ;-) argvalue.params.retrainable = False argvalue.untrain() failed_tests = {} skipped_tests = [] for argname in kwargs.keys(): for argvalue in kwargs[argname]: if isinstance(argvalue, Classifier): # clear classifier before its use argvalue.untrain() if isinstance(argvalue, ClassWithCollections): argvalue.ca.reset() # update kwargs_ kwargs_[argname] = argvalue # do actual call try: if __debug__: debug( 'TEST', 'Running %s on args=%r and kwargs=%r' % (method.__name__, args_, kwargs_)) method(*args_, **kwargs_) except SkipTest, e: skipped_tests += [e] except AssertionError, e: estr = str(e) etype, value, tb = sys.exc_info() # literal representation of exception tb, so # we could group them later on eidstr = ' '.join([ l for l in tbm.format_exception(etype, value, tb) if not ('do_sweep' in l or 'unittest.py' in l or 'AssertionError' in l or 'Traceback (most' in l) ]) # Store exception information for later on groupping if not eidstr in failed_tests: failed_tests[eidstr] = [] sargvalue = str(argvalue) if not (__debug__ and 'TEST' in debug.active): # by default lets make it of sane length if len(sargvalue) > 100: sargvalue = sargvalue[:95] + ' ...' failed_tests[eidstr].append( # skip top-most tb in sweep_args (argname, sargvalue, tb.tb_next, estr)) if __debug__: msg = "%s on %s=%s" % (estr, argname, argvalue) debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg)) untrain_clf(argvalue) # TODO: handle different levels of unittests properly if cfg.getboolean('tests', 'quick', False): # on TESTQUICK just run test for 1st entry in the list, # the rest are omitted # TODO: proper partitioning of unittests break
def exists(dep, force=False, raise_=False, issueWarning=None): """ Test whether a known dependency is installed on the system. This method allows us to test for individual dependencies without testing all known dependencies. It also ensures that we only test for a dependency once. Parameters ---------- dep : string or list of string The dependency key(s) to test. force : boolean Whether to force the test even if it has already been performed. raise_ : boolean Whether to raise RuntimeError if dependency is missing. issueWarning : string or None or True If string, warning with given message would be thrown. If True, standard message would be used for the warning text. """ # if we are provided with a list of deps - go through all of them if isinstance(dep, list) or isinstance(dep, tuple): results = [ exists(dep_, force, raise_) for dep_ in dep ] return bool(reduce(lambda x,y: x and y, results, True)) # where to look in cfg cfgid = 'have ' + dep # prevent unnecessarry testing if cfg.has_option('externals', cfgid) \ and not cfg.getboolean('externals', 'retest', default='no') \ and not force: if __debug__: debug('EXT', "Skip retesting for '%s'." % dep) # check whether an exception should be raised, even though the external # was already tested previously if not cfg.getboolean('externals', cfgid) \ and raise_ \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep return cfg.getboolean('externals', cfgid) # determine availability of external (non-cached) # default to 'not found' result = False if not _KNOWN.has_key(dep): raise ValueError, "%s is not a known dependency key." % (dep) else: # try and load the specific dependency if __debug__: debug('EXT', "Checking for the presence of %s" % dep) # Exceptions which are silently caught while running tests for externals _caught_exceptions = [ImportError, AttributeError, RuntimeError] estr = '' try: exec _KNOWN[dep] result = True except tuple(_caught_exceptions), e: estr = ". Caught exception was: " + str(e) except Exception, e: # Add known ones by their names so we don't need to # actually import anything manually to get those classes if e.__class__.__name__ in ['RPy_Exception', 'RRuntimeError', 'RPy_RException']: _caught_exceptions += [e.__class__] estr = ". Caught exception was: " + str(e) else: raise
]: _caught_exceptions += [e.__class__] estr = ". Caught exception was: " + str(e) else: raise if __debug__: debug( 'EXT', "Presence of %s is%s verified%s" % (dep, { True: '', False: ' NOT' }[result], estr)) if not result: if raise_ \ and cfg.getboolean('externals', 'raise exception', True): raise RuntimeError, "Required external '%s' was not found" % dep if issueWarning is not None \ and cfg.getboolean('externals', 'issue warning', True): if issueWarning is True: warning("Required external '%s' was not found" % dep) else: warning(issueWarning) # store result in config manager if not cfg.has_section('externals'): cfg.add_section('externals') if result: cfg.set('externals', 'have ' + dep, 'yes') else: cfg.set('externals', 'have ' + dep, 'no')
fig = 1 for ds in datasets: for mname, mapper in mappers.iteritems(): mapper.train(ds) dproj = mapper.forward(ds.samples) mproj = mapper.proj pl.subplot(ndatasets, nmappers, fig) if fig <= 3: pl.title(mname) pl.axis('equal') pl.scatter(ds.samples[:, 0] - center[0], ds.samples[:, 1] - center[1], s=30, c=(ds.sa.targets) * 200) plot_proj_dir(mproj) fig += 1 if cfg.getboolean('examples', 'interactive', True): pl.show() """ Output of the example: .. image:: ../pics/ex_projections.* :align: center :alt: SVD/ICA/PCA projections """
def enhanced_doc_string(item, *args, **kwargs): """Generate enhanced doc strings for various items. Parameters ---------- item : str or class What object requires enhancing of documentation *args : list Includes base classes to look for parameters, as well, first item must be a dictionary of locals if item is given by a string force_extend : bool Either to force looking for the documentation in the parents. By default force_extend = False, and lookup happens only if kwargs is one of the arguments to the respective function (e.g. item.__init__) skip_params : list of str List of parameters (in addition to [kwargs]) which should not be added to the documentation of the class. It is to be used from a collector, ie whenever class is already created """ # Handling of arguments if len(kwargs): if set(kwargs.keys()).issubset(set(['force_extend'])): raise ValueError, "Got unknown keyword arguments (smth among %s)" \ " in enhanced_doc_string." % kwargs force_extend = kwargs.get('force_extend', False) skip_params = kwargs.get('skip_params', []) # XXX make it work also not only with classes but with methods as well if isinstance(item, basestring): if len(args)<1 or not isinstance(args[0], dict): raise ValueError, \ "Please provide locals for enhanced_doc_string of %s" % item name = item lcl = args[0] args = args[1:] elif hasattr(item, "im_class"): # bound method raise NotImplementedError, \ "enhanced_doc_string is not yet implemented for methods" elif hasattr(item, "__name__"): name = item.__name__ lcl = item.__dict__ else: raise ValueError, "Don't know how to extend docstring for %s" % item # check whether docstring magic is requested or not if not cfg.getboolean('doc', 'pimp docstrings', True): return lcl['__doc__'] if __debug__: debug('DOCH', 'Processing docstrings of %s' % name) #return lcl['__doc__'] rst_lvlmarkup = ["=", "-", "_"] # would then be called for any child... ok - ad hoc for SVM??? if hasattr(item, '_customize_doc') and name=='SVM': item._customize_doc() initdoc = "" if lcl.has_key('__init__'): func = lcl['__init__'] initdoc = func.__doc__ # either to extend arguments # do only if kwargs is one of the arguments # in python 2.5 args are no longer in co_names but in varnames extend_args = force_extend or \ 'kwargs' in (func.func_code.co_names + func.func_code.co_varnames) if __debug__ and not extend_args: debug('DOCH', 'Not extending parameters for __init__ of %s' % name) if initdoc is None: initdoc = "Initialize instance of %s" % name initdoc, params, suffix = _split_out_parameters(initdoc) params_list = _parse_parameters(params) known_params = set([i[0] for i in params_list]) # If there are additional ones: if lcl.has_key('_paramsdoc'): params_list += [i for i in lcl['_paramsdoc'] if not (i[0] in known_params)] known_params = set([i[0] for i in params_list]) # no need for placeholders skip_params = set(skip_params + ['kwargs', '**kwargs']) # XXX we do evil check here, refactor code to separate # regressions out of the classifiers, and making # retrainable flag not available for those classes which # can't actually do retraining. Although it is not # actually that obvious for Meta Classifiers if hasattr(item, '__tags__'): clf_internals = item.__tags__ skip_params.update([i for i in ('retrainable',) if not (i in clf_internals)]) known_params.update(skip_params) if extend_args: # go through all the parents and obtain their init parameters parent_params_list = [] for i in args: if hasattr(i, '__init__'): # XXX just assign within a class to don't redo without need initdoc_ = i.__init__.__doc__ if initdoc_ is None: continue splits_ = _split_out_parameters(initdoc_) params_ = splits_[1] parent_params_list += _parse_parameters(params_.lstrip()) # extend with ones which are not known to current init for i, v in parent_params_list: if not (i in known_params): params_list += [(i, v)] known_params.update([i]) # if there are parameters -- populate the list if len(params_list): params_ = '\n'.join([i[1].rstrip() for i in params_list if not i[0] in skip_params]) initdoc += "\n\n%s\n" \ % _rst_section('Parameters') + _indent(params_) if suffix != "": initdoc += "\n\n" + suffix initdoc = handle_docstring(initdoc) # Finally assign generated doc to the constructor lcl['__init__'].__doc__ = initdoc docs = [ handle_docstring(lcl['__doc__']) ] # Optionally populate the class documentation with it if __add_init2doc and initdoc != "": docs += [ _rst_underline('Constructor information for `%s` class' % name, rst_lvlmarkup[2]), initdoc ] # Add information about the ca if available if lcl.has_key('_cadoc') and len(item._cadoc): # to don't conflict with Notes section if such was already # present lcldoc = lcl['__doc__'] or '' if not 'Notes' in lcldoc: section_name = _rst_section('Notes') else: section_name = '\n' # just an additional newline # no indent is necessary since ca list must be already indented docs += ['%s\nAvailable conditional attributes:' % section_name, handle_docstring(item._cadoc)] # Deprecated -- but actually we might like to have it in ipython # mode may be? if False: #len(args): bc_intro = _rst(' ') + 'Please refer to the documentation of the ' \ 'base %s for more information:' \ % (single_or_plural('class', 'classes', len(args))) docs += ['\n' + _rst_section('See Also'), bc_intro, ' ' + ',\n '.join(['%s%s.%s%s%s' % (_rst(':class:`~'), i.__module__, i.__name__, _rst('`'), _rst_sep) for i in args]) ] itemdoc = '\n\n'.join(docs) # remove some bogus new lines -- never 3 empty lines in doc are useful result = re.sub("\s*\n\s*\n\s*\n", "\n\n", itemdoc) return result
nt : int, optional Dimensionality of target space data : array, optional Some data (should have rank high enough) to derive rotation """ if nt is None: nt = ns # figure out some "random" rotation d = max(ns, nt) if data is None: data = np.random.normal(size=(d * 10, d)) _u, _s, _vh = np.linalg.svd(data[:, :d]) R = _vh[:ns, :nt] if ns == nt: # Test if it is indeed a rotation matrix ;) # Lets flip first axis if necessary if np.linalg.det(R) < 0: R[:, 0] *= -1.0 return R if cfg.getboolean('tests', 'use hdf datasets', False): if not externals.exists('h5py'): raise RuntimeError( "Cannot perform HDF5 dump of all datasets in the warehouse, " "because 'h5py' is not available") datasets = saveload_warehouse() print "Replaced all dataset warehouse for HDF5 loaded alternative."
def collectTestSuites(): """Runs over all tests it knows and composes a dictionary with test suite instances as values and IDs as keys. IDs are the filenames of the unittest without '.py' extension and 'test_' prefix. During collection this function will run a full and verbose test for all known externals. """ # list all test modules (without .py extension) tests = [ # Basic data structures/manipulators 'test_externals', 'test_base', 'test_dochelpers', 'test_dataset', 'test_arraymapper', 'test_boxcarmapper', 'test_som', 'test_neighbor', 'test_maskeddataset', 'test_metadataset', 'test_splitter', 'test_state', 'test_params', 'test_eepdataset', # Misc supporting utilities 'test_config', 'test_stats', 'test_support', 'test_verbosity', 'test_iohelpers', 'test_report', 'test_datasetfx', 'test_cmdline', 'test_args', 'test_eepdataset', 'test_meg', # Classifiers (longer tests) 'test_kernel', 'test_clf', 'test_regr', 'test_knn', 'test_svm', 'test_plr', 'test_smlr', # Various algorithms 'test_svdmapper', 'test_procrust', 'test_samplegroupmapper', 'test_transformers', 'test_transerror', 'test_clfcrossval', 'test_searchlight', 'test_rfe', 'test_ifs', 'test_datameasure', 'test_perturbsensana', 'test_splitsensana', # And the suite (all-in-1) 'test_suite', ] # provide people with a hint about the warnings that might show up in a # second warning('Testing for availability of external software packages. Test ' 'cases depending on missing packages will not be part of the test ' 'suite.') # So we could see all warnings about missing dependencies warning.maxcount = 1000 # fully test of externals externals.testAllDependencies() __optional_tests = [ ('scipy', 'ridge'), ('scipy', 'stats_sp'), ('scipy', 'datasetfx_sp'), (['lars','scipy'], 'lars'), ('nifti', 'niftidataset'), ('mdp', 'icamapper'), ('scipy', 'zscoremapper'), ('pywt', 'waveletmapper'), (['cPickle', 'gzip'], 'hamster'), # ('mdp', 'pcamapper'), ] if not cfg.getboolean('tests', 'lowmem', default='no'): __optional_tests += [(['nifti', 'lxml'], 'atlases')] # and now for the optional tests optional_tests = [] for external, testname in __optional_tests: if externals.exists(external): optional_tests.append('test_%s' % testname) # finally merge all of them tests += optional_tests # import all test modules for t in tests: exec 'import ' + t # instanciate all tests suites and return dict of them (with ID as key) return dict([(t[5:], eval(t + '.suite()')) for t in tests ])
def do_sweep(*args_, **kwargs_): """Perform sweeping over provided keyword arguments """ def untrain_clf(argvalue): """Little helper""" if isinstance(argvalue, Classifier): # clear classifier after its use -- just to be sure ;-) argvalue.params.retrainable = False argvalue.untrain() failed_tests = {} skipped_tests = [] for argname in kwargs.keys(): for argvalue in kwargs[argname]: if isinstance(argvalue, Classifier): # clear classifier before its use argvalue.untrain() if isinstance(argvalue, ClassWithCollections): argvalue.ca.reset() # update kwargs_ kwargs_[argname] = argvalue # do actual call try: if __debug__: debug('TEST', 'Running %s on args=%r and kwargs=%r' % (method.__name__, args_, kwargs_)) method(*args_, **kwargs_) except SkipTest, e: skipped_tests += [e] except AssertionError, e: estr = str(e) etype, value, tb = sys.exc_info() # literal representation of exception tb, so # we could group them later on eidstr = ' '.join( [l for l in tbm.format_exception(etype, value, tb) if not ('do_sweep' in l or 'unittest.py' in l or 'AssertionError' in l or 'Traceback (most' in l)]) # Store exception information for later on groupping if not eidstr in failed_tests: failed_tests[eidstr] = [] sargvalue = str(argvalue) if not (__debug__ and 'TEST' in debug.active): # by default lets make it of sane length if len(sargvalue) > 100: sargvalue = sargvalue[:95] + ' ...' failed_tests[eidstr].append( # skip top-most tb in sweep_args (argname, sargvalue, tb.tb_next, estr)) if __debug__: msg = "%s on %s=%s" % (estr, argname, argvalue) debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg)) untrain_clf(argvalue) # TODO: handle different levels of unittests properly if cfg.getboolean('tests', 'quick', False): # on TESTQUICK just run test for 1st entry in the list, # the rest are omitted # TODO: proper partitioning of unittests break