def read(self): if self.cursor < len(self.data): index = self.cursor self.cursor = index + 1 self.current = convert_to_bytes(self.data[index]) else: self.current = None return self.current
def test_unicode_input(unicrud): unicrud = list(set(unicrud)) assume(len(unicrud) > 5) # MPH is an entropy game, hence things with low-entropy will # confuse the hash algorithms preventing convergence on a # solution, making this test fail assume(_entropy(unicrud) == -0.0) mph = cmph.generate_hash(unicrud) # ... break the encapsulation, knowing that we # do this under the hood test_strs = [convert_to_bytes(s) for s in unicrud] for original, escaped in zip(unicrud, test_strs): assert mph(escaped) == mph(original)
def lookup(self, key): """ Generate hash code for a key from the Minimal Perfect Hash (MPH) Parameters ---------- Key : object The item to generate a key for, this works best for keys that are strings, or can be transformed fairly directly into bytes Returns : int The code for the given item """ assert self._mph key = convert_to_bytes(key) box = ffi.new('char[]', key) try: result = _cmph.cmph_search(self._mph, box, len(key)) return result finally: del box
def test_unicode_bytes(unicrud): assert unicrud_type(convert_to_bytes(unicrud), 'utf8') == unicrud