def test_hash_chars(self): "For one table, all chars map to different chars" results = set() for c in range(256): random.seed(1) h = distinct.hash(chr(c)) results.add(h) self.assertEquals(len(results), 256)
def test_chi_square(self): try: from scipy.stats import chi2 except ImportError: raise SkipTest("Missing chi2, skipping") N = 10000 for (bits, buckets) in [(-1, 1024), (24, 256), (16, 256), (8, 256), (0, 256)]: bins = [0] * buckets for i in range(N): v = distinct.hash(str(i)) if bits < 0: bin = v / (0xFFFFFFFF / buckets) else: bin = (v >> bits) & 0xFF bins[bin] += 1 value = sum(((x - N / buckets)**2) / (N / buckets) for x in bins) pval = chi2.cdf(value, N) if pval > 0.5: print(bins, pval) self.assertTrue(pval < 0.5, "bits %s, pval == %s" % (bits, pval))
def test_chi_square(self): try: from scipy.stats import chi2 except ImportError: raise SkipTest("Missing chi2, skipping") N = 10000 for (bits, buckets) in [(-1, 1024), (24, 256), (16, 256), (8, 256), (0, 256)]: bins = [0] * buckets for i in range(N): v = distinct.hash(str(i)) if bits < 0: bin = v / (0xFFFFFFFF / buckets) else: bin = (v >> bits) & 0xFF bins[bin] += 1 value = sum(((x - N / buckets) ** 2) / (N / buckets) for x in bins) pval = chi2.cdf(value, N) if pval > 0.5: print(bins, pval) self.assertTrue(pval < 0.5, "bits %s, pval == %s" % (bits, pval))