def test_invalid_algo(algo): if algo in cmph._ALGOS: pytest.skip("Random algo is a known algo !") with pytest.raises(ValueError): test_data = _words with open(test_data) as test_input: cmph.generate_hash(test_input, algorithm=algo)
def test_invalid_hash_fn(hash_fns): assume(len(hash_fns) > 1) assume(any(fn not in cmph._HASH_FNS for fn in hash_fns)) with pytest.raises(ValueError): test_data = _words with open(test_data) as test_input: cmph.generate_hash(test_input, hash_fns=hash_fns)
def test_each_algo_defaults(tmpdir, algo): if algo == 'brz': pytest.skip("brz is known to segfault on some machines") test_data = _words if algo == 'bmz8': test_data = _words8 with open(test_data) as test_input: mph = cmph.generate_hash(test_input, algorithm=algo) out = tmpdir.ensure('%s.mph' % algo) with out.open('w') as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) with open(test_data) as test_input: for word in test_input: assert mph(word) == mph2(word) # Check that nothing untoward happens in __del__ del mph del mph2
def test_each_algo_defaults(tmpdir, algo): if algo == "brz": pytest.skip("brz is known to segfault on some machines") test_data = _words if algo == "bmz8": test_data = _words8 with open(test_data) as test_input: mph = cmph.generate_hash(test_input, algorithm=algo) out = tmpdir.ensure("%s.mph" % algo) with out.open("w") as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) with open(test_data) as test_input: for word in test_input: assert mph(word) == mph2(word) # Check that nothing untoward happens in __del__ del mph del mph2
def _storedata(self, path, keys): if os.path.exists(path): shutil.rmtree(path) os.makedirs(path) mph = cmph.generate_hash(keys) mph.save(self._mph_path) indices = [mph(k) for k in keys] _max = max(indices) keyindices = dict((k, i) for i, k in enumerate(keys)) data = dict((zip(indices, keys))) d = VarArray(path) _data = [] for i in range(_max + 1): k = data.get(i, None) v = None if k is None else (keyindices[k], k) _data.append(v) d.extend(_data) d.flush() return d, mph
def test_filename_usage(tmpdir): mph = cmph.generate_hash(_words) out = tmpdir.ensure('out.mph') mph.save(out.strpath) mph2 = cmph.load_hash(out.strpath) with open(_words) as test_input: for word in test_input: assert mph(word) == mph2(word)
def test_filename_usage(tmpdir): mph = cmph.generate_hash(_words) out = tmpdir.ensure("out.mph") mph.save(out.strpath) mph2 = cmph.load_hash(out.strpath) with open(_words) as test_input: for word in test_input: assert mph(word) == mph2(word)
def test_str_input2(tmpdir): data = open(_words).readlines() mph = cmph.generate_hash(data) out = tmpdir.ensure("out.mph") with out.open("w") as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) for word in data: assert mph(word) == mph2(word)
def test_str_input(tmpdir): data = 'This is a string list test'.split() mph = cmph.generate_hash(data) out = tmpdir.ensure('out.mph') with out.open('w') as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) for word in data: assert mph(word) == mph2(word)
def test_str_input(tmpdir): data = "This is a string list test".split() mph = cmph.generate_hash(data) out = tmpdir.ensure("out.mph") with out.open("w") as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) for word in data: assert mph(word) == mph2(word)
def test_str_input2(tmpdir): data = open(_words).readlines() mph = cmph.generate_hash(data) out = tmpdir.ensure('out.mph') with out.open('w') as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) for word in data: assert mph(word) == mph2(word)
def test_simple_usage(tmpdir): with open(_words) as test_input: mph = cmph.generate_hash(test_input) out = tmpdir.ensure('out.mph') with out.open('w') as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) with open(_words) as test_input: for word in test_input: assert mph(word) == mph2(word)
def test_simple_usage(tmpdir): with open(_words) as test_input: mph = cmph.generate_hash(test_input) out = tmpdir.ensure("out.mph") with out.open("w") as test_output: mph.save(test_output) with out.open() as saved_mph: mph2 = cmph.load_hash(saved_mph) with open(_words) as test_input: for word in test_input: assert mph(word) == mph2(word)
def test_unicode_input(unicrud): unicrud = list(set(unicrud)) assume(len(unicrud) > 5) # MPH is an entropy game, hence things with low-entropy will # confuse the hash algorithms preventing convergence on a # solution, making this test fail assume(_entropy(unicrud) == -0.0) mph = cmph.generate_hash(unicrud) # ... break the encapsulation, knowing that we # do this under the hood test_strs = [convert_to_bytes(s) for s in unicrud] for original, escaped in zip(unicrud, test_strs): assert mph(escaped) == mph(original)
if len(places) < r: placeref_t = t break transref_t = None for (r, t) in range2type: if len(transs) < r: transref_t = t break if placeref_t is None or transref_t is None: sys.stderr.write( 'FATAL: Unable to fit places and transitions into 64 bit integers\n') sys.exit(1) place_mph = cmph.generate_hash(places, algorithm='bdz') trans_mph = cmph.generate_hash(transs, algorithm='bdz') place_mph.save(pidx_fn) trans_mph.save(tidx_fn) place_idx = {} trans_idx = {} place_list = [''] * place_num trans_list = [''] * trans_num for p in places: place_idx[p] = place_mph.lookup(p) place_list[place_idx[p]] = p for t in transs: trans_idx[t] = trans_mph.lookup(t)