def test_128bit_x64_basic_string( self ): solution = self._load_solutions('solution_hash128_x64_seed0.txt') with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file: for l in test_file.readlines(): s = solution[l] r = pymmh3.hash128( l, x64arch = True ) self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
def syntactic_identifier_int(self) -> int: # todo: move to library # Use murmur3 hash to minimize collisions clean_syntactic_context = "\n".join( (s.strip() for s in self.syntactic_context.split("\n"))) str_id = str((self.check_id, self.path, clean_syntactic_context)) return mmh3.hash128(str_id)
def test_128bit_x86_custom_seed_bytearray( self ): solution = self._load_solutions('solution_hash128_x86_seed1234ABCD.txt') with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file: for l in test_file.readlines(): s = solution[l] r = pymmh3.hash128( bytearray( l ), seed = 0x1234ABCD, x64arch = False ) self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
def get_file_information(f, inpath, include_checksum): ''' Grabs path and size from scandir file object. Will compute file's checksum if asked. ''' f_path = os.path.relpath(f.path, inpath).strip() f_size = str(f.stat().st_size) f_info = [f_path, f_size] if include_checksum: with open(f.path, 'rU') as infile: f_checksum = str(mmh3.hash128(infile.read(), 2017)) f_info.append(f_checksum) return f_info
def _modified_hash(self, paths: Iterable[Path]) -> str: """ Returns a hash of the recursive mtime of a path. Any modification of a file within this tree (that does not match an ignore pattern) will change the hash. """ # No matter settings of .bentoignore, these are always excluded files_and_times = ((str(p), p.stat().st_mtime_ns) for p in paths if p.exists()) h = 0 for f, m in files_and_times: h ^= mmh3.hash128(f"{f}:{m}") return format(h, "x")
def _modified_hash(self) -> str: """ Returns a hash of the recursive mtime of a path. Any modification of a file within this tree (that does not match an ignore pattern) will change the hash. """ # No matter settings of .bentoignore, these are always excluded exclude_files = {".bento", ".bento-whitelist.yml", ".bento.yml"} files_and_times = ((e.path, e.dir_entry.stat( follow_symlinks=False).st_mtime_ns) for e in self.file_ignore.entries() if e.survives if os.path.basename(e.path) not in exclude_files) h = 0 for f, m in files_and_times: h ^= mmh3.hash128(f"{f}:{m}") return format(h, "x")
def syntactic_identifier_int(self) -> int: # Use murmur3 hash to minimize collisions str_id = str((self.check_id, self.path, self.syntactic_context)) return pymmh3.hash128(str_id)
#!/usr/bin/python #encoding:utf-8 import pymmh3 as mmh arr = ['brand_id@1007', 'category_id@9990', 'item_discount', "item_discountx", "item_dis213count", "item_discount7"] base = 10000000 for s in arr: v = mmh.hash128(s) print "%s\t%s" % (v, v%base)