def test_legacy_hash_ascii_data(self): """Test legacy hash function against known results.""" splitter = splitters.Splitter() file_name = os.path.join(os.path.dirname(__file__), 'files', 'sample-data.jsonl') with open(file_name, 'r') as flo: lines = flo.read().split('\n') for line in lines: if line is None or line == '': continue seed, key, hashed, bucket = json.loads(line) assert hashfns.legacy.legacy_hash(key, seed) == hashed assert splitter.get_bucket(key, seed, splits.HashAlgorithm.LEGACY) == bucket
def test_murmur_more_ascii_data(self): """Test legacy hash function against known results.""" splitter = splitters.Splitter() file_name = os.path.join(os.path.dirname(__file__), 'files', 'murmur3-custom-uuids.csv') with open(file_name, 'r') as flo: lines = flo.read().split('\n') for line in lines: if line is None or line == '': continue seed, key, hashed, bucket = line.split(',') seed = int(seed) bucket = int(bucket) hashed = int(hashed) assert hashfns._murmur_hash(key, seed) == hashed assert splitter.get_bucket(key, seed, splits.HashAlgorithm.MURMUR) == bucket
def test_murmur_hash_non_ascii_data(self): """Test legacy hash function against known results.""" splitter = splitters.Splitter() file_name = os.path.join( os.path.dirname(__file__), 'files', 'murmur3-sample-data-non-alpha-numeric-v2.csv') with io.open(file_name, 'r', encoding='utf-8') as flo: lines = flo.read().split('\n') for line in lines: if line is None or line == '': continue seed, key, hashed, bucket = line.split(',') seed = int(seed) bucket = int(bucket) hashed = int(hashed) assert hashfns._murmur_hash(key, seed) == hashed assert splitter.get_bucket(key, seed, splits.HashAlgorithm.MURMUR) == bucket