Ejemplo n.º 1
0
    def test_legacy_hash_ascii_data(self):
        """Test legacy hash function against known results."""
        splitter = splitters.Splitter()
        file_name = os.path.join(os.path.dirname(__file__), 'files', 'sample-data.jsonl')
        with open(file_name, 'r') as flo:
            lines = flo.read().split('\n')

        for line in lines:
            if line is None or line == '':
                continue
            seed, key, hashed, bucket = json.loads(line)
            assert hashfns.legacy.legacy_hash(key, seed) == hashed
            assert splitter.get_bucket(key, seed, splits.HashAlgorithm.LEGACY) == bucket
Ejemplo n.º 2
0
    def test_murmur_more_ascii_data(self):
        """Test legacy hash function against known results."""
        splitter = splitters.Splitter()
        file_name = os.path.join(os.path.dirname(__file__), 'files', 'murmur3-custom-uuids.csv')
        with open(file_name, 'r') as flo:
            lines = flo.read().split('\n')

        for line in lines:
            if line is None or line == '':
                continue
            seed, key, hashed, bucket = line.split(',')
            seed = int(seed)
            bucket = int(bucket)
            hashed = int(hashed)
            assert hashfns._murmur_hash(key, seed) == hashed
            assert splitter.get_bucket(key, seed, splits.HashAlgorithm.MURMUR) == bucket
Ejemplo n.º 3
0
    def test_murmur_hash_non_ascii_data(self):
        """Test legacy hash function against known results."""
        splitter = splitters.Splitter()
        file_name = os.path.join(
            os.path.dirname(__file__), 'files',
            'murmur3-sample-data-non-alpha-numeric-v2.csv')
        with io.open(file_name, 'r', encoding='utf-8') as flo:
            lines = flo.read().split('\n')

        for line in lines:
            if line is None or line == '':
                continue
            seed, key, hashed, bucket = line.split(',')
            seed = int(seed)
            bucket = int(bucket)
            hashed = int(hashed)
            assert hashfns._murmur_hash(key, seed) == hashed
            assert splitter.get_bucket(key, seed,
                                       splits.HashAlgorithm.MURMUR) == bucket