Beispiel #1
def ks_test(x, y=None, alt="two sided", exact = None, warn_for_ties = True):
    """Returns the statistic and probability from the Kolmogorov-Smirnov test.
        - x, y: vectors of numbers whose distributions are to be compared.
        - alt: the alternative hypothesis, default is 2-sided.
        - exact: whether to compute the exact probability
        - warn_for_ties: warns when values are tied. This should left at True
          unless a monte carlo variant, like ks_boot, is being used.
    Note the 1-sample cases are not implemented, although their cdf's are
    implemented in"""
    # translation from R 2.4
    num_x = len(x)
    num_y = None
    x = zip(x, zeros(len(x), int))
    lo = ["less", "lo", "lower", "l", "lt"]
    hi = ["greater", "hi", "high", "h", "g", "gt"]
    two = ["two sided", "2", 2, "two tailed", "two", "two.sided"]
    Pval = None
    if y is not None: # in anticipation of actually implementing the 1-sample cases
        num_y = len(y)
        y = zip(y, ones(len(y), int))
        n = num_x * num_y / (num_x + num_y)
        combined = x + y
        if len(set(combined)) < num_x + num_y:
            ties = True
            ties = False
        combined = array(combined, dtype=[('stat', float), ('sample', int)])
        cumsum = zeros(combined.shape[0], float)
        scales = array([1/num_x, -1/num_y])
        indices = combined['sample']
        cumsum = scales.take(indices)
        cumsum = cumsum.cumsum()
        if exact == None:
            exact = num_x * num_y < 1e4
        if alt in two:
            stat = max(fabs(cumsum))
        elif alt in lo:
            stat = -cumsum.min()
        elif alt in hi:
            stat = cumsum.max()
            raise RuntimeError, "Unknown alt: %s" % alt
        if exact and alt in two and not ties:
            Pval = 1 - psmirnov2x(stat, num_x, num_y)
        raise NotImplementedError
    if Pval == None:
        if alt in two:
            Pval = 1 - pkstwo(sqrt(n) * stat)
            Pval = exp(-2 * n * stat**2)
    if ties and warn_for_ties:
        warnings.warn("Cannot compute correct KS probability with ties")
    try: # if numpy arrays were input, the Pval can be an array of len==1
        Pval = Pval[0]
    except (TypeError, IndexError):
    return stat, Pval
Beispiel #2
 def test_pkstwo(self):
     """kolmogorov asymptotic should match answers from R"""
Beispiel #3
 def test_pkstwo(self):
     """kolmogorov asymptotic should match answers from R"""