Esempio n. 1
0
    def test_match_distribution_semifrozen(self):
        """Handle frozen params in match_distribution
        """
        matches = match_distribution(
            np.arange(10),
            distributions=['uniform', ('uniform', {
                'loc': 0
            })],
            p=-1  # so we get all matches
        )
        self.assertEqual(len(matches), 2)  # we must get some match

        self.assertTrue(abs(matches[0][-1][0]) <
                        4e-1)  # full fit should get close to true loc
        self.assertEqual(matches[1][-1][0],
                         0)  # frozen should maintain the loc

        if externals.versions['scipy'] >= '0.10':
            # known to work on 0.10 and fail on 0.7.3
            self.assertTrue(abs(matches[0][-1][1] - 9) <
                            1e-1)  # full fit should get close to true scale
        else:
            raise SkipTest("KnownFailure to fit uniform on older scipy")

        # actually it fails ATM to fit uniform with frozen loc=0
        # nicely -- sets scale = 1 :-/   TODO
        raise SkipTest("TODO: Known failure to fit uniform with frozen loc")
        self.assertTrue(
            abs(matches[1][-1][1] - 9) < 1e-1)  # frozen fit of scale
Esempio n. 2
0
    def test_match_distribution(self):
        """Some really basic testing for match_distribution
        """
        ds = datasets['uni2medium']  # large to get stable stats
        data = ds.samples[:, ds.a.bogus_features[0]]
        # choose bogus feature, which
        # should have close to normal distribution

        # Lets test ad-hoc rv_semifrozen
        floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
        self.assertTrue(floc[0] == 0)

        fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
        self.assertTrue(fscale[1] == 1)

        flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
        self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0)

        full = scipy.stats.norm.fit(data)
        for res in [floc, fscale, flocscale, full]:
            self.assertTrue(len(res) == 2)

        data_mean = np.mean(data)
        for loc in [None, data_mean]:
            for test in ['p-roc', 'kstest']:
                # some really basic testing
                matched = match_distribution(data=data,
                                             distributions=[
                                                 'scipy',
                                                 ('norm', {
                                                     'name': 'norm_fixed',
                                                     'loc': 0.2,
                                                     'scale': 0.3
                                                 })
                                             ],
                                             test=test,
                                             loc=loc,
                                             p=0.05)
                # at least norm should be in there
                names = [m[2] for m in matched]
                if test == 'p-roc':
                    if cfg.getboolean('tests', 'labile', default='yes'):
                        # we can guarantee that only for norm_fixed
                        self.assertTrue('norm' in names)
                        self.assertTrue('norm_fixed' in names)
                        inorm = names.index('norm_fixed')
                        # and it should be at least in the first
                        # 30 best matching ;-)
                        self.assertTrue(inorm <= 30)
                    # Test plotting only once
                    if loc is None and externals.exists("pylab plottable"):
                        import pylab as pl
                        from mvpa2.clfs.stats import plot_distribution_matches
                        fig = pl.figure()
                        plot_distribution_matches(data,
                                                  matched,
                                                  legend=1,
                                                  nbest=5)
                        #pl.show()
                        pl.close(fig)
Esempio n. 3
0
    def test_match_distribution_semifrozen(self):
        """Handle frozen params in match_distribution
        """
        matches = match_distribution(np.arange(10),
                                     distributions=[
                                         'uniform',
                                         ('uniform', {'loc': 0})
                                         ],
                                     p= -1 # so we get all matches
                                     )
        self.assertEqual(len(matches), 2) # we must get some match

        self.assertTrue(abs(matches[0][-1][0]) < 4e-1) # full fit should get close to true loc
        self.assertEqual(matches[1][-1][0], 0) # frozen should maintain the loc

        if externals.versions['scipy'] >= '0.10':
            # known to work on 0.10 and fail on 0.7.3
            self.assertTrue(abs(matches[0][-1][1] - 9) < 1e-1) # full fit should get close to true scale
        else:
            raise SkipTest("KnownFailure to fit uniform on older scipy")

        # actually it fails ATM to fit uniform with frozen loc=0
        # nicely -- sets scale = 1 :-/   TODO
        raise SkipTest("TODO: Known failure to fit uniform with frozen loc")
        self.assertTrue(abs(matches[1][-1][1] - 9) < 1e-1) # frozen fit of scale
Esempio n. 4
0
    def test_match_distribution(self):
        """Some really basic testing for match_distribution
        """
        ds = datasets['uni2medium']      # large to get stable stats
        data = ds.samples[:, ds.a.bogus_features[0]]
        # choose bogus feature, which
        # should have close to normal distribution

        # Lets test ad-hoc rv_semifrozen
        floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
        self.assertTrue(floc[0] == 0)

        fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
        self.assertTrue(fscale[1] == 1)

        flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
        self.assertTrue(flocscale[1] == 1 and flocscale[0] == 0)

        full = scipy.stats.norm.fit(data)
        for res in [floc, fscale, flocscale, full]:
            self.assertTrue(len(res) == 2)

        data_mean = np.mean(data)
        for loc in [None, data_mean]:
            for test in ['p-roc', 'kstest']:
                # some really basic testing
                matched = match_distribution(
                    data=data,
                    distributions = ['scipy',
                                     ('norm',
                                      {'name': 'norm_fixed',
                                       'loc': 0.2,
                                       'scale': 0.3})],
                    test=test, loc=loc, p=0.05)
                # at least norm should be in there
                names = [m[2] for m in matched]
                if test == 'p-roc':
                    if cfg.getboolean('tests', 'labile', default='yes'):
                        # we can guarantee that only for norm_fixed
                        self.assertTrue('norm' in names)
                        self.assertTrue('norm_fixed' in names)
                        inorm = names.index('norm_fixed')
                        # and it should be at least in the first
                        # 30 best matching ;-)
                        self.assertTrue(inorm <= 30)
                    # Test plotting only once
                    if loc is None and externals.exists("pylab plottable"):
                        import pylab as pl
                        from mvpa2.clfs.stats import plot_distribution_matches
                        fig = pl.figure()
                        plot_distribution_matches(data, matched, legend=1, nbest=5)
                        #pl.show()
                        pl.close(fig)
Esempio n. 5
0
    def test_match_distribution_semifrozen(self):
        """Handle frozen params in match_distribution
        """
        matches = match_distribution(np.arange(10),
                                     distributions=[
                                         'uniform',
                                         ('uniform', {'loc': 0})
                                         ],
                                     p=-1 # so we get all matches
                                     )
        self.assertEqual(len(matches), 2) # we must get some match

        self.assertTrue(abs(matches[0][-1][0]) < 4e-1) # full fit should get close to true loc
        self.assertTrue(abs(matches[0][-1][1]-9) < 1e-1) # full fit should get close to true scale

        self.assertEqual(matches[1][-1][0], 0) # frozen should maintain the loc
        # actually it fails ATM to fit uniform with frozen loc=0
        # nicely -- sets scale = 1 :-/   TODO
        raise SkipTest("TODO: Known failure to fit uniform with frozen loc")
        self.assertTrue(abs(matches[1][-1][1]-9) < 1e-1) # frozen fit of scale