def __init__(self, filename): self.filename = filename self.m = b'' self.writable = False self.count = 0 f = None try: f = open(filename, 'rb+') except IOError as e: if e.errno == errno.ENOENT: pass else: raise if f: b = f.read(len(INDEX_HDR)) if b != INDEX_HDR: log('warning: %s: header: expected %r, got %r\n' % (filename, INDEX_HDR, b)) else: st = os.fstat(f.fileno()) if st.st_size: self.m = mmap_readwrite(f) self.writable = True self.count = struct.unpack( FOOTER_SIG, self.m[st.st_size - FOOTLEN:st.st_size])[0]
def __init__(self, filename): self.filename = filename self.m = '' self.writable = False self.count = 0 f = None try: f = open(filename, 'r+') except IOError as e: if e.errno == errno.ENOENT: pass else: raise if f: b = f.read(len(INDEX_HDR)) if b != INDEX_HDR: log('warning: %s: header: expected %r, got %r\n' % (filename, INDEX_HDR, b)) else: st = os.fstat(f.fileno()) if st.st_size: self.m = mmap_readwrite(f) self.writable = True self.count = struct.unpack(FOOTER_SIG, str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.closed = False self.name = filename self.readwrite = readwrite self.file = None self.map = None assert (filename.endswith(b'.bloom')) if readwrite: assert (expected > 0) self.file = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.file, close=False) else: self.map = mmap_readwrite(self.file, close=False) else: self.file = f or open(filename, 'rb') self.map = mmap_read(self.file) got = self.map[0:4] if got != b'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) self._init_failed() return ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) self._init_failed() return if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) self._init_failed() return self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = self.map[16 + 2**self.bits:] if idxnamestr: self.idxnames = idxnamestr.split(b'\0') else: self.idxnames = []
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.name = filename self.rwfile = None self.map = None assert(filename.endswith('.bloom')) if readwrite: assert(expected > 0) self.rwfile = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.rwfile, close=False) else: self.map = mmap_readwrite(self.rwfile, close=False) else: self.rwfile = None f = f or open(filename, 'rb') self.map = mmap_read(f) got = str(self.map[0:4]) if got != 'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = str(self.map[16 + 2**self.bits:]) if idxnamestr: self.idxnames = idxnamestr.split('\0') else: self.idxnames = []
def write(self, filename, packbin): ofs64_count = 0 for section in self.idx: for entry in section: if entry[2] >= 2**31: ofs64_count += 1 # Length: header + fan-out + shas-and-crcs + overflow-offsets index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count) idx_map = None idx_f = open(filename, 'w+b') try: idx_f.truncate(index_len) fdatasync(idx_f.fileno()) idx_map = mmap_readwrite(idx_f, close=False) try: count = _helpers.write_idx(filename, idx_map, self.idx, self.count) assert (count == self.count) idx_map.flush() finally: idx_map.close() finally: idx_f.close() idx_f = open(filename, 'a+b') try: idx_f.write(packbin) idx_f.seek(0) idx_sum = Sha1() b = idx_f.read(8 + 4 * 256) idx_sum.update(b) obj_list_sum = Sha1() for b in chunkyreader(idx_f, 20 * self.count): idx_sum.update(b) obj_list_sum.update(b) namebase = hexlify(obj_list_sum.digest()) for b in chunkyreader(idx_f): idx_sum.update(b) idx_f.write(idx_sum.digest()) fdatasync(idx_f.fileno()) return namebase finally: idx_f.close()
def _write_pack_idx_v2(self, filename, idx, packbin): ofs64_count = 0 for section in idx: for entry in section: if entry[2] >= 2**31: ofs64_count += 1 # Length: header + fan-out + shas-and-crcs + overflow-offsets index_len = 8 + (4 * 256) + (28 * self.count) + (8 * ofs64_count) idx_map = None idx_f = open(filename, 'w+b') try: idx_f.truncate(index_len) fdatasync(idx_f.fileno()) idx_map = mmap_readwrite(idx_f, close=False) try: count = _helpers.write_idx(filename, idx_map, idx, self.count) assert(count == self.count) idx_map.flush() finally: idx_map.close() finally: idx_f.close() idx_f = open(filename, 'a+b') try: idx_f.write(packbin) idx_f.seek(0) idx_sum = Sha1() b = idx_f.read(8 + 4*256) idx_sum.update(b) obj_list_sum = Sha1() for b in chunkyreader(idx_f, 20*self.count): idx_sum.update(b) obj_list_sum.update(b) namebase = obj_list_sum.hexdigest() for b in chunkyreader(idx_f): idx_sum.update(b) idx_f.write(idx_sum.digest()) fdatasync(idx_f.fileno()) return namebase finally: idx_f.close()
def _do_midx(outdir, outfilename, infilenames, prefixstr, auto=False, force=False): global _first if not outfilename: assert(outdir) sum = hexlify(Sha1(b'\0'.join(infilenames)).digest()) outfilename = b'%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20]) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b'' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (auto and (total < 1024 and len(infilenames) < 3)) \ or ((auto or force) and len(infilenames) < 2) \ or (force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write(b'MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write(b'\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) log(repr(p.idxnames) + '\n') assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = next(pi) assert(i == pin) assert(p.exists(i)) return total, outfilename
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = pi.next() assert(i == pin) assert(p.exists(i)) return total, outfilename