def test_hash_chars(self):
     "For one table, all chars map to different chars"
     results = set()
     for c in range(256):
         random.seed(1)
         h = distinct.hash(chr(c))
         results.add(h)
     self.assertEquals(len(results), 256)
Example #2
0
 def test_hash_chars(self):
     "For one table, all chars map to different chars"
     results = set()
     for c in range(256):
         random.seed(1)
         h = distinct.hash(chr(c))
         results.add(h)
     self.assertEquals(len(results), 256)
    def test_chi_square(self):
        try:
            from scipy.stats import chi2
        except ImportError:
            raise SkipTest("Missing chi2, skipping")

        N = 10000

        for (bits, buckets) in [(-1, 1024), (24, 256), (16, 256), (8, 256),
                                (0, 256)]:
            bins = [0] * buckets
            for i in range(N):
                v = distinct.hash(str(i))
                if bits < 0:
                    bin = v / (0xFFFFFFFF / buckets)
                else:
                    bin = (v >> bits) & 0xFF
                bins[bin] += 1
            value = sum(((x - N / buckets)**2) / (N / buckets) for x in bins)
            pval = chi2.cdf(value, N)
            if pval > 0.5:
                print(bins, pval)
            self.assertTrue(pval < 0.5, "bits %s, pval == %s" % (bits, pval))
Example #4
0
    def test_chi_square(self):
        try:
            from scipy.stats import chi2
        except ImportError:
            raise SkipTest("Missing chi2, skipping")

        N = 10000

        for (bits, buckets) in [(-1, 1024), (24, 256),
                                (16, 256), (8, 256), (0, 256)]:
            bins = [0] * buckets
            for i in range(N):
                v = distinct.hash(str(i))
                if bits < 0:
                    bin = v / (0xFFFFFFFF / buckets)
                else:
                    bin = (v >> bits) & 0xFF
                bins[bin] += 1
            value = sum(((x - N / buckets) ** 2) / (N / buckets) for x in bins)
            pval = chi2.cdf(value, N)
            if pval > 0.5:
                print(bins, pval)
            self.assertTrue(pval < 0.5, "bits %s, pval == %s" % (bits, pval))