    def test_null_dist_prob(self, null):
        """Testing null dist probability"""
        if not isinstance(null, NullDist):
        ds = datasets['uni2small']

        null.fit(OneWayAnova(), ds)

        # check reasonable output.
        # p-values for non-bogus features should significantly different,
        # while bogus (0) not
        prob = null.p([20, 0, 0, 0, 0, np.nan])
        # XXX this is labile! it also needs checking since the F-scores
        # of the MCNullDists using normal distribution are apparently not
        # distributed that way, hence the test often (if not always) fails.
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.failUnless(np.abs(prob[0]) < 0.05,
                            msg="Expected small p, got %g" % prob[0])
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.failUnless((np.abs(prob[1:]) > 0.05).all(),
                            msg="Bogus features should have insignificant p."
                            " Got %s" % (np.abs(prob[1:]),))

        # has to have matching shape
        if not isinstance(null, FixedNullDist):
            # Fixed dist is univariate ATM so it doesn't care
            # about dimensionality and gives 1 output value
            self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
    def testSimpleSOM(self):
        colors = [[0., 0., 0.], [0., 0., 1.], [0., 1., 0.],
                  [1., 0., 0.], [0., 1., 1.], [1., 0., 1.],
                  [1., 1., 0.], [1., 1., 1.]]
        ds = Dataset(samples=colors, labels=1)

        # only small SOM for speed reasons
        som = SimpleSOMMapper((10, 5), 200, learning_rate=0.05)

        # no acces when nothing is there
        self.failUnlessRaises(RuntimeError, som._accessKohonen)
        self.failUnlessRaises(RuntimeError, som.getInSize)
        self.failUnlessRaises(RuntimeError, som.getOutSize)


        self.failUnless(som.getInSize() == 3)
        self.failUnless(som.getOutSize() == (10,5))

        fmapped = som(colors)
        self.failUnless(fmapped.shape == (8, 2))
        for fm in fmapped:

        # reverse mapping
        rmapped = som.reverse(fmapped)

        if cfg.getboolean('tests', 'labile', default='yes'):
            # should approximately restore the input, but could fail
            # with bas initialisation
            self.failUnless((N.round(rmapped) == ds.samples).all())
    def testAnova(self):
        """Do some extended testing of OneWayAnova

        in particular -- compound estimation

        m = OneWayAnova()               # default must be not compound ?
        mc = CompoundOneWayAnova(combiner=None)
        ds = datasets['uni2medium']

        # For 2 labels it must be identical for both and equal to
        # simple OneWayAnova
        a, ac = m(ds), mc(ds)

        self.failUnless(a.shape == (ds.nfeatures,))
        self.failUnless(ac.shape == (ds.nfeatures, len(ds.uniquelabels)))

        self.failUnless((ac[:, 0] == ac[:, 1]).all())
        self.failUnless((a == ac[:, 1]).all())

        ds = datasets['uni4large']
        ac = mc(ds)

        if cfg.getboolean('tests', 'labile', default='yes'):
            # All non-bogus features must be high for a corresponding feature
                                 N.arange(4))] >= 1).all())
        # All features should have slightly but different CompoundAnova
        # values. I really doubt that there will be a case when this
        # test would fail just to being 'labile'
        self.failUnless(N.max(N.std(ac, axis=1))>0,
                        msg='In compound anova, we should get different'
                        ' results for different labels. Got %s' % ac)
def collect_nose_tests(verbosity=1):
    """Return list of tests which are pure nose-based
    tests = [
        # Basic data structures/manipulators

        # Datasets

        # Misc supporting

        # Mappers

        # Learners

        # Algorithms

        # IO

        # Measures

    if not cfg.getboolean('tests', 'lowmem', default='no'):
        tests += ['test_atlases']

    return tests
        def do_sweep(*args_, **kwargs_):
            def untrain_clf(argvalue):
                """Little helper"""
                if isinstance(argvalue, Classifier):
                    # clear classifier after its use -- just to be sure ;-)
                    argvalue.retrainable = False

            failed_tests = {}
            for argname in kwargs.keys():
                for argvalue in kwargs[argname]:
                    if isinstance(argvalue, Classifier):
                        # clear classifier before its use
                    if isinstance(argvalue, ClassWithCollections):
                    # update kwargs_
                    kwargs_[argname] = argvalue
                    # do actual call
                        if __debug__:
                            debug('TEST', 'Running %s on args=%s and kwargs=%s' %
                                  (method.__name__, `args_`, `kwargs_`))
                        method(*args_, **kwargs_)
                    except AssertionError, e:
                        estr = str(e)
                        etype, value, tb = sys.exc_info()
                        # literal representation of exception tb, so
                        # we could group them later on
                        eidstr = '  '.join(
                            [l for l in traceback.format_exception(etype, value, tb)
                             if not ('do_sweep' in l or 'unittest.py' in l
                                     or 'AssertionError' in l or 'Traceback (most' in l)])

                        # Store exception information for later on groupping
                        if not eidstr in failed_tests:
                            failed_tests[eidstr] = []

                            # skip top-most tb in sweep_args
                            (argname, `argvalue`, tb.tb_next, estr))

                        if __debug__:
                            msg = "%s on %s=%s" % (estr, argname, `argvalue`)
                            debug('TEST', 'Failed unittest: %s\n%s' % (eidstr, msg))
                    # TODO: handle different levels of unittests properly
                    if cfg.getboolean('tests', 'quick', False):
                        # on TESTQUICK just run test for 1st entry in the list,
                        # the rest are omitted
                        # TODO: proper partitioning of unittests
    def test_enet(self):
        # not the perfect dataset with which to test, but
        # it will do for now.
        #data = datasets['dumb2']
        # for some reason the R code fails with the dumb data
        data = datasets['chirp_linear']

        clf = ENET()


        # prediction has to be almost perfect
        # test with a correlation
        pre = clf.predict(data.samples)
        cor = pearsonr(pre, data.targets)
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.failUnless(cor[0] > .8)
def collect_nose_tests():
    """Return list of tests which are pure nose-based
    tests = [ 'test_collections',

    if not cfg.getboolean('tests', 'lowmem', default='no'):
        tests += ['test_atlases']

    ## SkipTest will take care about marking those as S
    ## if externals.exists('scipy'):
    ##     tests += ['test_mapper_sp']
    ## if externals.exists('glmnet'):
    ##     tests += ['test_glmnet']
    ## if externals.exists('nifti'):
    ##     tests += ['test_niftidataset']
    ## if externals.exists('mdp'):
    ##     tests += ['test_mdp']
    ## if externals.exists('h5py'):
    ##     tests += ['test_hdf5']

    return tests
def __check_rpy():
    """Check either rpy is available and also set it for the sane execution
    #import rpy_options
    #rpy_options.set_options(VERBOSE=False, SETUP_READ_CONSOLE=False) # SETUP_WRITE_CONSOLE=False)
    #rpy_options.set_options(VERBOSE=False, SETUP_WRITE_CONSOLE=False) # SETUP_WRITE_CONSOLE=False)
    #    if not cfg.get('rpy', 'read_console', default=False):
    #        print "no read"
    #        rpy_options.set_options(SETUP_READ_CONSOLE=False)
    #    if not cfg.get('rpy', 'write_console', default=False):
    #        print "no write"
    #        rpy_options.set_options(SETUP_WRITE_CONSOLE=False)
    import rpy
    if not cfg.getboolean('rpy', 'interactive', default=True) \
           and (rpy.get_rpy_input() is rpy.rpy_io.rpy_input):
        if __debug__:
            debug('EXT_', "RPy: providing dummy callback for input to return '1'")
        def input1(*args): return "1"      # which is "1: abort (with core dump, if enabled)"
def testAllDependencies(force=False):
    Test for all known dependencies.

      force : boolean
        Whether to force the test even if it has already been

    # loop over all known dependencies
    for dep in _KNOWN:
        if not exists(dep, force):
            warning("%s is not available." % dep)

    if __debug__:
        debug('EXT', 'The following optional externals are present: %s' \
                     % [ k[5:] for k in cfg.options('externals')
                            if k.startswith('have') \
                            and cfg.getboolean('externals', k) == True ])
    def test_anova(self):
        """Do some extended testing of OneWayAnova

        in particular -- compound estimation

        m = OneWayAnova()  # default must be not compound ?
        mc = CompoundOneWayAnova()
        ds = datasets['uni2medium']

        # For 2 labels it must be identical for both and equal to
        # simple OneWayAnova
        a, ac = m(ds), mc(ds)

        self.failUnless(a.shape == (1, ds.nfeatures))
        self.failUnless(ac.shape == (len(ds.UT), ds.nfeatures))

        assert_array_equal(ac[0], ac[1])
        assert_array_equal(a, ac[1])

        # check for p-value attrs
        if externals.exists('scipy'):
            assert_true('fprob' in a.fa.keys())
            assert_equal(len(ac.fa), len(ac))

        ds = datasets['uni4large']
        ac = mc(ds)
        if cfg.getboolean('tests', 'labile', default='yes'):
            # All non-bogus features must be high for a corresponding feature
                            np.array(ds.a.nonbogus_features)] >= 1).all())
        # All features should have slightly but different CompoundAnova
        # values. I really doubt that there will be a case when this
        # test would fail just to being 'labile'
        self.failUnless(np.max(np.std(ac, axis=1)) > 0,
                        msg='In compound anova, we should get different'
                        ' results for different labels. Got %s' % ac)
def exists(dep, force=False, raise_=False, issueWarning=None):
    Test whether a known dependency is installed on the system.

    This method allows us to test for individual dependencies without
    testing all known dependencies. It also ensures that we only test
    for a dependency once.

    dep : string or list of string
      The dependency key(s) to test.
    force : boolean
      Whether to force the test even if it has already been
    raise_ : boolean
      Whether to raise RuntimeError if dependency is missing.
    issueWarning : string or None or True
      If string, warning with given message would be thrown.
      If True, standard message would be used for the warning
    # if we are provided with a list of deps - go through all of them
    if isinstance(dep, list) or isinstance(dep, tuple):
        results = [exists(dep_, force, raise_) for dep_ in dep]
        return bool(reduce(lambda x, y: x and y, results, True))

    # where to look in cfg
    cfgid = 'have ' + dep

    # prevent unnecessarry testing
    if cfg.has_option('externals', cfgid) \
       and not cfg.getboolean('externals', 'retest', default='no') \
       and not force:
        if __debug__:
            debug('EXT', "Skip retesting for '%s'." % dep)

        # check whether an exception should be raised, even though the external
        # was already tested previously
        if not cfg.getboolean('externals', cfgid) \
               and raise_ \
               and cfg.getboolean('externals', 'raise exception', True):
            raise RuntimeError, "Required external '%s' was not found" % dep
        return cfg.getboolean('externals', cfgid)

    # determine availability of external (non-cached)

    # default to 'not found'
    result = False

    if not _KNOWN.has_key(dep):
        raise ValueError, "%s is not a known dependency key." % (dep)
        # try and load the specific dependency
        if __debug__:
            debug('EXT', "Checking for the presence of %s" % dep)

        # Exceptions which are silently caught while running tests for externals
        _caught_exceptions = [ImportError, AttributeError, RuntimeError]

        estr = ''
            exec _KNOWN[dep]
            result = True
        except tuple(_caught_exceptions), e:
            estr = ". Caught exception was: " + str(e)
        except Exception, e:
            # Add known ones by their names so we don't need to
            # actually import anything manually to get those classes
            if e.__class__.__name__ in [
                    'RPy_Exception', 'RRuntimeError', 'RPy_RException'
                _caught_exceptions += [e.__class__]
                estr = ". Caught exception was: " + str(e)
            # Add known ones by their names so we don't need to
            # actually import anything manually to get those classes
            if e.__class__.__name__ in ['RPy_Exception', 'RRuntimeError',
                _caught_exceptions += [e.__class__]
                estr = ". Caught exception was: " + str(e)

        if __debug__:
            debug('EXT', "Presence of %s is%s verified%s" %
                  (dep, {True:'', False:' NOT'}[result], estr))

    if not result:
        if raise_ \
               and cfg.getboolean('externals', 'raise exception', True):
            raise RuntimeError, "Required external '%s' was not found" % dep
        if issueWarning is not None \
               and cfg.getboolean('externals', 'issue warning', True):
            if issueWarning is True:
                warning("Required external '%s' was not found" % dep)

    # store result in config manager
    if not cfg.has_section('externals'):
    if result:
        cfg.set('externals', 'have ' + dep, 'yes')
for ds in datasets:
    for mname, mapper in mappers.iteritems():

        dproj = mapper.forward(ds.samples)
        mproj = mapper.proj
        pl.subplot(ndatasets, nmappers, fig)
        if fig <= 3:

        pl.scatter(ds.samples[:, 0] - center[0],
                  ds.samples[:, 1] - center[1],
                  s=30, c=(ds.sa.targets) * 200)
        fig += 1

if cfg.getboolean('examples', 'interactive', True):

Output of the example:

.. image:: ../pics/ex_projections.*
   :align: center
   :alt: SVD/ICA/PCA projections

                    estr = ". Caught exception was: " + str(e)
            # And restore warnings

        if __debug__:
            debug('EXT', "Presence of %s is%s verified%s" %
                  (dep, {True:'', False:' NOT'}[result], estr))

    if not result:
        if raise_:
            raise RuntimeError, "Required external '%s' was not found" % dep
        if issueWarning is not None \
               and cfg.getboolean('externals', 'issue warning', True):
            if issueWarning is True:
                warning("Required external '%s' was not found" % dep)

    # store result in config manager
    if not cfg.has_section('externals'):
    if result:
        cfg.set('externals', 'have ' + dep, 'yes')
        cfg.set('externals', 'have ' + dep, 'no')

    return result
      Dimensionality of target space
    data : array, optional
      Some data (should have rank high enough) to derive
    if nt is None:
        nt = ns
    # figure out some "random" rotation
    d = max(ns, nt)
    if data is None:
        data = np.random.normal(size=(d*10, d))
    _u, _s, _vh = np.linalg.svd(data[:, :d])
    R = _vh[:ns, :nt]
    if ns == nt:
        # Test if it is indeed a rotation matrix ;)
        # Lets flip first axis if necessary
        if np.linalg.det(R) < 0:
            R[:, 0] *= -1.0
    return R

datasets = generate_testing_datasets(specs)

if cfg.getboolean('tests', 'use hdf datasets', False):
    if not externals.exists('h5py'):
        raise RuntimeError(
            "Cannot perform HDF5 dump of all datasets in the warehouse, "
            "because 'h5py' is not available")

    datasets = saveload_warehouse()
    print "Replaced all dataset warehouse for HDF5 loaded alternative."
def enhanced_doc_string(item, *args, **kwargs):
    """Generate enhanced doc strings for various items.

    item : str or class
      What object requires enhancing of documentation
    *args : list
      Includes base classes to look for parameters, as well, first item
      must be a dictionary of locals if item is given by a string
    force_extend : bool
      Either to force looking for the documentation in the parents.
      By default force_extend = False, and lookup happens only if kwargs
      is one of the arguments to the respective function (e.g. item.__init__)
    skip_params : list of str
      List of parameters (in addition to [kwargs]) which should not
      be added to the documentation of the class.

    It is to be used from a collector, ie whenever class is already created
    # Handling of arguments
    if len(kwargs):
        if set(kwargs.keys()).issubset(set(['force_extend'])):
            raise ValueError, "Got unknown keyword arguments (smth among %s)" \
                  " in enhanced_doc_string." % kwargs
    force_extend = kwargs.get('force_extend', False)
    skip_params = kwargs.get('skip_params', [])

    # XXX make it work also not only with classes but with methods as well
    if isinstance(item, basestring):
        if len(args) < 1 or not isinstance(args[0], dict):
            raise ValueError, \
                  "Please provide locals for enhanced_doc_string of %s" % item
        name = item
        lcl = args[0]
        args = args[1:]
    elif hasattr(item, "im_class"):
        # bound method
        raise NotImplementedError, \
              "enhanced_doc_string is not yet implemented for methods"
    elif hasattr(item, "__name__"):
        name = item.__name__
        lcl = item.__dict__
        raise ValueError, "Don't know how to extend docstring for %s" % item

    # check whether docstring magic is requested or not
    if not cfg.getboolean('doc', 'pimp docstrings', True):
        return lcl['__doc__']

    if __debug__:
        debug('DOCH', 'Processing docstrings of %s' % name)

    #return lcl['__doc__']
    rst_lvlmarkup = ["=", "-", "_"]

    # would then be called for any child... ok - ad hoc for SVM???
    if hasattr(item, '_customize_doc') and name == 'SVM':

    initdoc = ""
    if lcl.has_key('__init__'):
        func = lcl['__init__']
        initdoc = func.__doc__

        # either to extend arguments
        # do only if kwargs is one of the arguments
        # in python 2.5 args are no longer in co_names but in varnames
        extend_args = force_extend or \
                      'kwargs' in (func.func_code.co_names +

        if __debug__ and not extend_args:
                  'Not extending parameters for __init__ of  %s' % name)

        if initdoc is None:
            initdoc = "Initialize instance of %s" % name

        initdoc, params, suffix = _split_out_parameters(initdoc)
        params_list = _parse_parameters(params)

        known_params = set([i[0] for i in params_list])

        # If there are additional ones:
        if lcl.has_key('_paramsdoc'):
            params_list += [
                i for i in lcl['_paramsdoc'] if not (i[0] in known_params)
            known_params = set([i[0] for i in params_list])

        # no need for placeholders
        skip_params = set(skip_params + ['kwargs', '**kwargs'])

        # XXX we do evil check here, refactor code to separate
        #     regressions out of the classifiers, and making
        #     retrainable flag not available for those classes which
        #     can't actually do retraining. Although it is not
        #     actually that obvious for Meta Classifiers
        if hasattr(item, '__tags__'):
            clf_internals = item.__tags__
                [i for i in ('retrainable', ) if not (i in clf_internals)])

        if extend_args:
            # go through all the parents and obtain their init parameters
            parent_params_list = []
            for i in args:
                if hasattr(i, '__init__'):
                    # XXX just assign within a class to don't redo without need
                    initdoc_ = i.__init__.__doc__
                    if initdoc_ is None:
                    splits_ = _split_out_parameters(initdoc_)
                    params_ = splits_[1]
                    parent_params_list += _parse_parameters(params_.lstrip())

            # extend with ones which are not known to current init
            for i, v in parent_params_list:
                if not (i in known_params):
                    params_list += [(i, v)]

        # if there are parameters -- populate the list
        if len(params_list):
            params_ = '\n'.join([
                i[1].rstrip() for i in params_list if not i[0] in skip_params
            initdoc += "\n\n%s\n" \
                       % _rst_section('Parameters') + _indent(params_)

        if suffix != "":
            initdoc += "\n\n" + suffix

        initdoc = handle_docstring(initdoc)

        # Finally assign generated doc to the constructor
        lcl['__init__'].__doc__ = initdoc

    docs = [handle_docstring(lcl['__doc__'])]

    # Optionally populate the class documentation with it
    if __add_init2doc and initdoc != "":
        docs += [
            _rst_underline('Constructor information for `%s` class' % name,
                           rst_lvlmarkup[2]), initdoc

    # Add information about the ca if available
    if lcl.has_key('_cadoc') and len(item._cadoc):
        # to don't conflict with Notes section if such was already
        # present
        lcldoc = lcl['__doc__'] or ''
        if not 'Notes' in lcldoc:
            section_name = _rst_section('Notes')
            section_name = '\n'  # just an additional newline
        # no indent is necessary since ca list must be already indented
        docs += [
            '%s\nAvailable conditional attributes:' % section_name,

    # Deprecated -- but actually we might like to have it in ipython
    # mode may be?
    if False:  #len(args):
        bc_intro = _rst('  ') + 'Please refer to the documentation of the ' \
                   'base %s for more information:' \
                   % (single_or_plural('class', 'classes', len(args)))

        docs += [
            '\n' + _rst_section('See Also'), bc_intro, '  ' + ',\n  '.join([
                '%s%s.%s%s%s' % (_rst(':class:`~'), i.__module__, i.__name__,
                                 _rst('`'), _rst_sep) for i in args

    itemdoc = '\n\n'.join(docs)
    # remove some bogus new lines -- never 3 empty lines in doc are useful
    result = re.sub("\s*\n\s*\n\s*\n", "\n\n", itemdoc)

    return result
                _caught_exceptions += [e.__class__]
                estr = ". Caught exception was: " + str(e)

        if __debug__:
                'EXT', "Presence of %s is%s verified%s" % (dep, {
                    True: '',
                    False: ' NOT'
                }[result], estr))

    if not result:
        if raise_ \
               and cfg.getboolean('externals', 'raise exception', True):
            raise RuntimeError, "Required external '%s' was not found" % dep
        if issueWarning is not None \
               and cfg.getboolean('externals', 'issue warning', True):
            if issueWarning is True:
                warning("Required external '%s' was not found" % dep)

    # store result in config manager
    if not cfg.has_section('externals'):
    if result:
        cfg.set('externals', 'have ' + dep, 'yes')
        cfg.set('externals', 'have ' + dep, 'no')
fig = 1

for ds in datasets:
    for mname, mapper in mappers.iteritems():

        dproj = mapper.forward(ds.samples)
        mproj = mapper.proj
        pl.subplot(ndatasets, nmappers, fig)
        if fig <= 3:

        pl.scatter(ds.samples[:, 0] - center[0],
                   ds.samples[:, 1] - center[1],
                   c=(ds.sa.targets) * 200)
        fig += 1

if cfg.getboolean('examples', 'interactive', True):
Output of the example:

.. image:: ../pics/ex_projections.*
   :align: center
   :alt: SVD/ICA/PCA projections

    nt : int, optional
      Dimensionality of target space
    data : array, optional
      Some data (should have rank high enough) to derive
    if nt is None:
        nt = ns
    # figure out some "random" rotation
    d = max(ns, nt)
    if data is None:
        data = np.random.normal(size=(d * 10, d))
    _u, _s, _vh = np.linalg.svd(data[:, :d])
    R = _vh[:ns, :nt]
    if ns == nt:
        # Test if it is indeed a rotation matrix ;)
        # Lets flip first axis if necessary
        if np.linalg.det(R) < 0:
            R[:, 0] *= -1.0
    return R

if cfg.getboolean('tests', 'use hdf datasets', False):
    if not externals.exists('h5py'):
        raise RuntimeError(
            "Cannot perform HDF5 dump of all datasets in the warehouse, "
            "because 'h5py' is not available")

    datasets = saveload_warehouse()
    print "Replaced all dataset warehouse for HDF5 loaded alternative."
def collectTestSuites():
    """Runs over all tests it knows and composes a dictionary with test suite
    instances as values and IDs as keys. IDs are the filenames of the unittest
    without '.py' extension and 'test_' prefix.

    During collection this function will run a full and verbose test for all
    known externals.
    # list all test modules (without .py extension)
    tests = [
        # Basic data structures/manipulators
        # Misc supporting utilities
        # Classifiers (longer tests)
        # Various algorithms
        # And the suite (all-in-1)

    # provide people with a hint about the warnings that might show up in a
    # second
    warning('Testing for availability of external software packages. Test '
            'cases depending on missing packages will not be part of the test '

    # So we could see all warnings about missing dependencies
    warning.maxcount = 1000
    # fully test of externals

    __optional_tests = [ ('scipy', 'ridge'),
                         ('scipy', 'stats_sp'),
                         ('scipy', 'datasetfx_sp'),
                         (['lars','scipy'], 'lars'),
                         ('nifti', 'niftidataset'),
                         ('mdp', 'icamapper'),
                         ('scipy', 'zscoremapper'),
                         ('pywt', 'waveletmapper'),
                         (['cPickle', 'gzip'], 'hamster'),
    #                     ('mdp', 'pcamapper'),

    if not cfg.getboolean('tests', 'lowmem', default='no'):
        __optional_tests += [(['nifti', 'lxml'], 'atlases')]

    # and now for the optional tests
    optional_tests = []

    for external, testname in __optional_tests:
        if externals.exists(external):
            optional_tests.append('test_%s' % testname)

    # finally merge all of them
    tests += optional_tests

    # import all test modules
    for t in tests:
        exec 'import ' + t

    # instanciate all tests suites and return dict of them (with ID as key)
    return dict([(t[5:], eval(t + '.suite()')) for t in tests ])
