def __init__(self, filename): self.name = filename self.force_keep = False self.map = None assert (filename.endswith('.midx')) self.map = mmap_read(open(filename)) if str(self.map[0:4]) != 'MIDX': log('Warning: skipping: invalid MIDX header in %r\n' % filename) self.force_keep = True return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < MIDX_VERSION: log('Warning: ignoring old-style (v%d) midx %r\n' % (ver, filename)) self.force_keep = False # old stuff is boring return self._init_failed() if ver > MIDX_VERSION: log('Warning: ignoring too-new (v%d) midx %r\n' % (ver, filename)) self.force_keep = True # new stuff is exciting return self._init_failed() self.bits = _helpers.firstword(self.map[8:12]) self.entries = 2**self.bits self.fanout = buffer(self.map, 12, self.entries * 4) self.sha_ofs = 12 + self.entries * 4 self.nsha = nsha = self._fanget(self.entries - 1) self.shatable = buffer(self.map, self.sha_ofs, nsha * 20) self.which_ofs = self.sha_ofs + 20 * nsha self.whichlist = buffer(self.map, self.which_ofs, nsha * 4) self.idxnames = str(self.map[self.which_ofs + 4 * nsha:]).split('\0')
def __init__(self, filename): self.name = filename self.force_keep = False self.map = None assert(filename.endswith('.midx')) self.map = mmap_read(open(filename)) if str(self.map[0:4]) != 'MIDX': log('Warning: skipping: invalid MIDX header in %r\n' % filename) self.force_keep = True return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < MIDX_VERSION: log('Warning: ignoring old-style (v%d) midx %r\n' % (ver, filename)) self.force_keep = False # old stuff is boring return self._init_failed() if ver > MIDX_VERSION: log('Warning: ignoring too-new (v%d) midx %r\n' % (ver, filename)) self.force_keep = True # new stuff is exciting return self._init_failed() self.bits = _helpers.firstword(self.map[8:12]) self.entries = 2**self.bits self.fanout = buffer(self.map, 12, self.entries*4) self.sha_ofs = 12 + self.entries*4 self.nsha = nsha = self._fanget(self.entries-1) self.shatable = buffer(self.map, self.sha_ofs, nsha*20) self.which_ofs = self.sha_ofs + 20*nsha self.whichlist = buffer(self.map, self.which_ofs, nsha*4) self.idxnames = str(self.map[self.which_ofs + 4*nsha:]).split('\0')
def __init__(self, filename): self.name = filename self.force_keep = False self.map = None assert(filename.endswith(b'.midx')) self.map = mmap_read(open(filename)) if self.map[0:4] != b'MIDX': log('Warning: skipping: invalid MIDX header in %r\n' % path_msg(filename)) self.force_keep = True return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < MIDX_VERSION: log('Warning: ignoring old-style (v%d) midx %r\n' % (ver, path_msg(filename))) self.force_keep = False # old stuff is boring return self._init_failed() if ver > MIDX_VERSION: log('Warning: ignoring too-new (v%d) midx %r\n' % (ver, path_msg(filename))) self.force_keep = True # new stuff is exciting return self._init_failed() self.bits = _helpers.firstword(self.map[8:12]) self.entries = 2**self.bits self.fanout_ofs = 12 # fanout len is self.entries * 4 self.sha_ofs = self.fanout_ofs + self.entries * 4 self.nsha = self._fanget(self.entries - 1) # sha table len is self.nsha * 20 self.which_ofs = self.sha_ofs + 20 * self.nsha # which len is self.nsha * 4 self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0')
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.closed = False self.name = filename self.readwrite = readwrite self.file = None self.map = None assert (filename.endswith(b'.bloom')) if readwrite: assert (expected > 0) self.file = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.file, close=False) else: self.map = mmap_readwrite(self.file, close=False) else: self.file = f or open(filename, 'rb') self.map = mmap_read(self.file) got = self.map[0:4] if got != b'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) self._init_failed() return ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) self._init_failed() return if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) self._init_failed() return self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = self.map[16 + 2**self.bits:] if idxnamestr: self.idxnames = idxnamestr.split(b'\0') else: self.idxnames = []
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.name = filename self.rwfile = None self.map = None assert(filename.endswith('.bloom')) if readwrite: assert(expected > 0) self.rwfile = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.rwfile, close=False) else: self.map = mmap_readwrite(self.rwfile, close=False) else: self.rwfile = None f = f or open(filename, 'rb') self.map = mmap_read(f) got = str(self.map[0:4]) if got != 'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = str(self.map[16 + 2**self.bits:]) if idxnamestr: self.idxnames = idxnamestr.split('\0') else: self.idxnames = []
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) self.fanout = list(struct.unpack('!256I', buffer(self.map, 0, 256 * 4))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 256*4 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) self.fanout = list(struct.unpack('!256I', str(buffer(self.map, 0, 256*4)))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 256*4 self.shatable = buffer(self.map, self.sha_ofs, nsha*24)
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) # Min size for 'L' is 4, which is sufficient for struct's '!I' self.fanout = array('L', struct.unpack('!256I', self.map)) self.fanout.append(0) # entry "-1" self.nsha = self.fanout[255] self.sha_ofs = 256 * 4 # Avoid slicing shatable for individual hashes (very high overhead) self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) assert self.map[0:8] == b'\377tOc\0\0\0\2' # Min size for 'L' is 4, which is sufficient for struct's '!I' self.fanout = array('L', struct.unpack_from('!256I', self.map, offset=8)) self.fanout.append(0) self.nsha = self.fanout[255] self.sha_ofs = 8 + 256*4 self.ofstable_ofs = self.sha_ofs + self.nsha * 20 + self.nsha * 4 self.ofs64table_ofs = self.ofstable_ofs + self.nsha * 4 # Avoid slicing this for individual hashes (very high overhead) self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) assert (str(self.map[0:8]) == '\377tOc\0\0\0\2') self.fanout = list( struct.unpack('!256I', str(buffer(self.map, 8, 256 * 4)))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 8 + 256 * 4 self.shatable = buffer(self.map, self.sha_ofs, nsha * 20) self.ofstable = buffer(self.map, self.sha_ofs + nsha * 20 + nsha * 4, nsha * 4) self.ofs64table = buffer(self.map, 8 + 256 * 4 + nsha * 20 + nsha * 4 + nsha * 4)
def __init__(self, filename, f): self.name = filename self.idxnames = [self.name] self.map = mmap_read(f) assert(str(self.map[0:8]) == '\377tOc\0\0\0\2') self.fanout = list(struct.unpack('!256I', str(buffer(self.map, 8, 256*4)))) self.fanout.append(0) # entry "-1" nsha = self.fanout[255] self.sha_ofs = 8 + 256*4 self.shatable = buffer(self.map, self.sha_ofs, nsha*20) self.ofstable = buffer(self.map, self.sha_ofs + nsha*20 + nsha*4, nsha*4) self.ofs64table = buffer(self.map, 8 + 256*4 + nsha*20 + nsha*4 + nsha*4)