def ks_test(x, y=None, alt="two sided", exact = None, warn_for_ties = True): """Returns the statistic and probability from the Kolmogorov-Smirnov test. Arguments: - x, y: vectors of numbers whose distributions are to be compared. - alt: the alternative hypothesis, default is 2-sided. - exact: whether to compute the exact probability - warn_for_ties: warns when values are tied. This should left at True unless a monte carlo variant, like ks_boot, is being used. Note the 1-sample cases are not implemented, although their cdf's are implemented in ks.py""" # translation from R 2.4 num_x = len(x) num_y = None x = zip(x, zeros(len(x), int)) lo = ["less", "lo", "lower", "l", "lt"] hi = ["greater", "hi", "high", "h", "g", "gt"] two = ["two sided", "2", 2, "two tailed", "two", "two.sided"] Pval = None if y is not None: # in anticipation of actually implementing the 1-sample cases num_y = len(y) y = zip(y, ones(len(y), int)) n = num_x * num_y / (num_x + num_y) combined = x + y if len(set(combined)) < num_x + num_y: ties = True else: ties = False combined = array(combined, dtype=[('stat', float), ('sample', int)]) combined.sort(order='stat') cumsum = zeros(combined.shape[0], float) scales = array([1/num_x, -1/num_y]) indices = combined['sample'] cumsum = scales.take(indices) cumsum = cumsum.cumsum() if exact == None: exact = num_x * num_y < 1e4 if alt in two: stat = max(fabs(cumsum)) elif alt in lo: stat = -cumsum.min() elif alt in hi: stat = cumsum.max() else: raise RuntimeError, "Unknown alt: %s" % alt if exact and alt in two and not ties: Pval = 1 - psmirnov2x(stat, num_x, num_y) else: raise NotImplementedError if Pval == None: if alt in two: Pval = 1 - pkstwo(sqrt(n) * stat) else: Pval = exp(-2 * n * stat**2) if ties and warn_for_ties: warnings.warn("Cannot compute correct KS probability with ties") try: # if numpy arrays were input, the Pval can be an array of len==1 Pval = Pval[0] except (TypeError, IndexError): pass return stat, Pval
def test_pkstwo(self): """kolmogorov asymptotic should match answers from R""" self.assertFloatEqual(pkstwo(2.3),[1-5.084e-05],eps=1e-5)