def test_index():
    quants = np.arange(1, 20).astype('d')
    # make values in range [0, 20)
    rvs = np.random.rand(100) * 20
    i = su.index(rvs, quants)
    yield (np.testing.assert_array_equal, i, np.floor(rvs))
    yield assert_true, i.min() >= 0, 'negative index calculated'
    yield assert_true, i.max() <= len(quants), 'too high index calculated'

    i2 = quants.searchsorted(rvs)
    yield assert_true, (i==i2).all()
Esempio n. 2
0
    def p_score_from_maximal_statistics(self, tail, t=None,
                                        pooled_dims=(),
                                        corrected_dims=()):
        """Returns family-wise corrected p values for the requested tail
        based on the distribution of the most extreme statistics from
        each permuted test. Optionally pool the distributions across
        dimensions, or further correct for multiple comparisons across
        dimensions (IE, duplicate maximal stats across those dims)

        Parameters
        ----------
        tail : str
          {'pos', 'neg'} -- return p values for whether the test statistic
          if significantly large or small, respectively
        t : ndarray, optional
          Optionally, score these statistics against (rather than this object's
          statistics) against the maximal stat distribution.
        pooled_dims : tuple, optional (can not intersect with corrected_dims)
          If not empty, pool the extreme statistics across the given dims
          to form a new maximal distribution.
        corrected_dims : tuple, optional (can not intersect with pooled_dims)
          If not empty, replace the distribution along this dimension with
          the single maximal value.
        """
        if t is None:
            t = self.t
        else:
            assert t.shape == self.t.shape, 'shape mismatch, cannot map'

        dist = self._fix_dist(tail, corrected_dims, pooled_dims)
        p_table = np.linspace(0,1,dist.shape[0]+1,endpoint=True)
        if tail=='pos':
            dist = dist[::-1]
            p_table = p_table[::-1]

        # XYZ: maybe this can be smarter
        nt, nf = t.shape[1:]
        p_vals = np.empty_like(t)
        for tp in xrange(nt):
            for fp in xrange(nf):
                dt = 0 if dist.shape[1]==1 else tp
                df = 0 if dist.shape[2]==1 else fp
                t_indexed = su.index(t[:,tp,fp], dist[:,dt,df])
                np.take(p_table, t_indexed, out=p_vals[:,tp,fp])
                        
        
        return p_vals
Esempio n. 3
0
import numpy as np
import nutmeg.stats.stats_utils as su
import matplotlib.pyplot as pp

# simulate some maximal null distribution from 100 permutations
null = np.sort(np.random.normal(size=(100,), loc=4, scale=1.2))
# create test scores from our experiment
# (but for ease of demonstration, clip the values to be within the null dist)
t_scores = np.clip(
    np.random.normal(size=(20,), loc=5, scale=0.8),
    null.min(), null.max()*.99
    )

# Score the t_scores based on the empirical null distribution
index = su.index(t_scores, null)
p_table = np.linspace(1,0,len(null)+1,endpoint=True)
p_scores = np.take(p_table, index)

# Now, try to recover the null distribution, based on these few
# T-score and P-score pairs -- use the negative edges
edges, pbins = su.map_t(-t_scores, p_scores, 1/100.)

edges = -edges[::-1]

p_recovered = np.array(
    [ ( edges > t_scores[i] ).sum() for i in xrange(len(t_scores))]
    )
assert (np.round(p_scores*100) == p_recovered).all()

probability = np.arange(100)/100.