def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if extra: o.fatal('no arguments expected') git.check_repo_or_die() m = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) sys.stdout.flush() out = byte_stream(sys.stdout) report(-1, out) _helpers.random_sha() report(0, out) if opt.existing: def foreverit(mi): while 1: for e in mi: yield e objit = iter(foreverit(m)) for c in range(opt.cycles): for n in range(opt.number): if opt.existing: bin = next(objit) assert (m.exists(bin)) else: bin = _helpers.random_sha() # technically, a randomly generated object id might exist. # but the likelihood of that is the likelihood of finding # a collision in sha-1 by accident, which is so unlikely that # we don't care. assert (not m.exists(bin)) report((c + 1) * opt.number, out) if bloom._total_searches: out.write( b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n' % (bloom._total_searches, bloom._total_steps, bloom._total_steps * 1.0 / bloom._total_searches)) if midx._total_searches: out.write( b'midx: %d objects searched in %d steps: avg %.3f steps/object\n' % (midx._total_searches, midx._total_steps, midx._total_steps * 1.0 / midx._total_searches)) if git._total_searches: out.write( b'idx: %d objects searched in %d steps: avg %.3f steps/object\n' % (git._total_searches, git._total_steps, git._total_steps * 1.0 / git._total_searches)) out.write(b'Total time: %.3fs\n' % (time.time() - start))
def test_midx_close(): fddir = b'/proc/self/fd' try: os.listdir(fddir) except Exception: # not supported, not Linux, I guess return def openfiles(): for fd in os.listdir(fddir): try: yield os.readlink(os.path.join(fddir, fd)) except OSError: pass def force_midx(objdir): args = [path.exe(), b'midx', b'--auto', b'--dir', objdir] check_call(args) with no_lingering_errors(), \ test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) # create a few dummy idxes for i in range(10): _create_idx(tmpdir, i) git.auto_midx(tmpdir) l = git.PackIdxList(tmpdir) # this doesn't exist (yet) WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0))) for i in range(10, 15): _create_idx(tmpdir, i) # delete the midx ... # TODO: why do we need to? git.auto_midx() below doesn't?! for fn in os.listdir(tmpdir): if fn.endswith(b'.midx'): os.unlink(os.path.join(tmpdir, fn)) # and make a new one git.auto_midx(tmpdir) # check it still doesn't exist - we haven't refreshed WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0))) # check that we still have the midx open, this really # just checks more for the kernel API ('deleted' string) for fn in openfiles(): if not b'midx-' in fn: continue WVPASSEQ(True, b'deleted' in fn) # refresh the PackIdxList l.refresh() # and check that an object in pack 10 exists now WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0))) for fn in openfiles(): if not b'midx-' in fn: continue # check that we don't have it open anymore WVPASSEQ(False, b'deleted' in fn)
def test_duplicate_save_dates(): with no_lingering_errors(): with test_tempdir(b'bup-tvfs-') as tmpdir: bup_dir = tmpdir + b'/bup' environ[b'GIT_DIR'] = bup_dir environ[b'BUP_DIR'] = bup_dir environ[b'TZ'] = b'UTC' tzset() git.repodir = bup_dir data_path = tmpdir + b'/src' os.mkdir(data_path) with open(data_path + b'/file', 'wb+') as tmpfile: tmpfile.write(b'canary\n') ex((b'env', )) ex((bup_path, b'init')) ex((bup_path, b'index', b'-v', data_path)) for i in range(11): ex((bup_path, b'save', b'-d', b'100000', b'-n', b'test', data_path)) repo = LocalRepo() res = vfs.resolve(repo, b'/test') wvpasseq(2, len(res)) name, revlist = res[-1] wvpasseq(b'test', name) wvpasseq((b'.', b'1970-01-02-034640-00', b'1970-01-02-034640-01', b'1970-01-02-034640-02', b'1970-01-02-034640-03', b'1970-01-02-034640-04', b'1970-01-02-034640-05', b'1970-01-02-034640-06', b'1970-01-02-034640-07', b'1970-01-02-034640-08', b'1970-01-02-034640-09', b'1970-01-02-034640-10', b'latest'), tuple(sorted(x[0] for x in vfs.contents(repo, revlist))))
def test_apply_to_path_restricted_access(tmpdir): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. try: parent = tmpdir + b'/foo' path = parent + b'/bar' os.mkdir(parent) os.mkdir(path) clear_errors() if metadata.xattr: try: metadata.xattr.set(path, b'user.buptest', b'bup') except: print("failed to set test xattr") # ignore any failures here - maybe FS cannot do it pass m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 0o000) m.apply_to_path(path) print(b'saved_errors:', helpers.saved_errors, file=sys.stderr) expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set ") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in range(len(expected_errors)): assert str(helpers.saved_errors[i]).startswith(expected_errors[i]) finally: clear_errors()
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if not extra: o.fatal('filenames expected') if opt.seed != None: random.seed(opt.seed) for name in extra: name = argv_bytes(name) log('Damaging "%s"...\n' % path_msg(name)) with open(name, 'r+b') as f: st = os.fstat(f.fileno()) size = st.st_size if opt.percent or opt.size: ms1 = int(float(opt.percent or 0) / 100.0 * size) or size ms2 = opt.size or size maxsize = min(ms1, ms2) else: maxsize = 1 chunks = opt.num or 10 chunksize = size // chunks for r in range(chunks): sz = random.randrange(1, maxsize + 1) if sz > size: sz = size if opt.equal: ofs = r * chunksize else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) f.seek(ofs) f.write(randblock(sz))
def fmincore(fd): """Return the mincore() data for fd as a bytearray whose values can be tested via MINCORE_INCORE, or None if fd does not fully support the operation.""" st = os.fstat(fd) if (st.st_size == 0): return bytearray(0) if not _fmincore_chunk_size: _set_fmincore_chunk_size() pages_per_chunk = _fmincore_chunk_size // sc_page_size; page_count = (st.st_size + sc_page_size - 1) // sc_page_size; chunk_count = page_count // _fmincore_chunk_size if chunk_count < 1: chunk_count = 1 result = bytearray(page_count) for ci in compat.range(chunk_count): pos = _fmincore_chunk_size * ci; msize = min(_fmincore_chunk_size, st.st_size - pos) try: m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) except mmap.error as ex: if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: # Perhaps the file was a pipe, i.e. "... | bup split ..." return None raise ex try: _mincore(m, msize, 0, result, ci * pages_per_chunk) except OSError as ex: if ex.errno == errno.ENOSYS: return None raise return result
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. with no_lingering_errors(): with test_tempdir(b'bup-tmetadata-') as tmpdir: parent = tmpdir + b'/foo' path = parent + b'/bar' os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 0o000) m.apply_to_path(path) print('saved_errors:', helpers.saved_errors, file=sys.stderr) expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set '") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in range(len(expected_errors)): WVPASS( str(helpers.saved_errors[i]).startswith( expected_errors[i])) clear_errors()
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. with no_lingering_errors(): with test_tempdir('bup-tmetadata-') as tmpdir: parent = tmpdir + '/foo' path = parent + '/bar' os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 0o000) m.apply_to_path(path) print('saved_errors:', helpers.saved_errors, file=sys.stderr) expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set '") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in range(len(expected_errors)): WVPASS(str(helpers.saved_errors[i]).startswith(expected_errors[i])) clear_errors()
def test_encode(): s = b'hello world' looseb = b''.join(git._encode_looseobj(b'blob', s)) looset = b''.join(git._encode_looseobj(b'tree', s)) loosec = b''.join(git._encode_looseobj(b'commit', s)) packb = b''.join(git._encode_packobj(b'blob', s)) packt = b''.join(git._encode_packobj(b'tree', s)) packc = b''.join(git._encode_packobj(b'commit', s)) packlb = b''.join(git._encode_packobj(b'blob', s * 200)) WVPASSEQ(git._decode_looseobj(looseb), (b'blob', s)) WVPASSEQ(git._decode_looseobj(looset), (b'tree', s)) WVPASSEQ(git._decode_looseobj(loosec), (b'commit', s)) WVPASSEQ(git._decode_packobj(packb), (b'blob', s)) WVPASSEQ(git._decode_packobj(packt), (b'tree', s)) WVPASSEQ(git._decode_packobj(packc), (b'commit', s)) WVPASSEQ(git._decode_packobj(packlb), (b'blob', s * 200)) for i in range(10): WVPASS(git._encode_looseobj(b'blob', s, compression_level=i)) def encode_pobj(n): return b''.join(git._encode_packobj(b'blob', s, compression_level=n)) WVEXCEPT(ValueError, encode_pobj, -1) WVEXCEPT(ValueError, encode_pobj, 10) WVEXCEPT(ValueError, encode_pobj, b'x')
def test_long_index(): with no_lingering_errors(): with test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) w = git.PackWriter() obj_bin = struct.pack('!IIIII', 0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899) obj2_bin = struct.pack('!IIIII', 0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900) obj3_bin = struct.pack('!IIIII', 0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011) pack_bin = struct.pack('!IIIII', 0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100) idx = list(list() for i in range(256)) idx[0].append((obj_bin, 1, 0xfffffffff)) idx[0x11].append((obj2_bin, 2, 0xffffffffff)) idx[0x22].append((obj3_bin, 3, 0xff)) w.count = 3 name = tmpdir + b'/tmp.idx' r = w._write_pack_idx_v2(name, idx, pack_bin) i = git.PackIdxV2(name, open(name, 'rb')) WVPASSEQ(i.find_offset(obj_bin), 0xfffffffff) WVPASSEQ(i.find_offset(obj2_bin), 0xffffffffff) WVPASSEQ(i.find_offset(obj3_bin), 0xff)
def columnate(l, prefix): """Format elements of 'l' in columns with 'prefix' leading each line. The number of columns is determined automatically based on the string lengths. """ binary = isinstance(prefix, bytes) nothing = b'' if binary else '' nl = b'\n' if binary else '\n' if not l: return nothing l = l[:] clen = max(len(s) for s in l) ncols = (tty_width() - len(prefix)) // (clen + 2) if ncols <= 1: ncols = 1 clen = 0 cols = [] while len(l) % ncols: l.append(nothing) rows = len(l) // ncols for s in compat.range(0, len(l), rows): cols.append(l[s:s+rows]) out = nothing fmt = b'%-*s' if binary else '%-*s' for row in zip(*cols): out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl return out
def _create_idx(d, i): idx = git.PackIdxV2Writer() # add 255 vaguely reasonable entries for s in range(255): idx.add(struct.pack('18xBB', i, s), s, 100 * s) packbin = struct.pack('B19x', i) packname = os.path.join(d, b'pack-%s.idx' % hexlify(packbin)) idx.write(packname, packbin)
def test_read_and_seek(): # Write a set of randomly sized files containing random data whose # names are their sizes, and then verify that what we get back # from the vfs when seeking and reading with various block sizes # matches the original content. with no_lingering_errors(): with test_tempdir(b'bup-tvfs-read-') as tmpdir: resolve = vfs.resolve bup_dir = tmpdir + b'/bup' environ[b'GIT_DIR'] = bup_dir environ[b'BUP_DIR'] = bup_dir git.repodir = bup_dir repo = LocalRepo() data_path = tmpdir + b'/src' os.mkdir(data_path) seed = randint(-(1 << 31), (1 << 31) - 1) rand = Random() rand.seed(seed) print('test_read seed:', seed, file=sys.stderr) max_size = 2 * 1024 * 1024 sizes = set((rand.randint(1, max_size) for _ in range(5))) sizes.add(1) sizes.add(max_size) for size in sizes: write_sized_random_content(data_path, size, seed) ex((bup_path, b'init')) ex((bup_path, b'index', b'-v', data_path)) ex((bup_path, b'save', b'-d', b'100000', b'-tvvn', b'test', b'--strip', data_path)) read_sizes = set((rand.randint(1, max_size) for _ in range(10))) sizes.add(1) sizes.add(max_size) print('test_read src sizes:', sizes, file=sys.stderr) print('test_read read sizes:', read_sizes, file=sys.stderr) for size in sizes: res = resolve(repo, b'/test/latest/' + str(size).encode('ascii')) _, item = res[-1] wvpasseq(size, vfs.item_size(repo, res[-1][1])) validate_vfs_streaming_read(repo, item, b'%s/%d' % (data_path, size), read_sizes) validate_vfs_seeking_read(repo, item, b'%s/%d' % (data_path, size), read_sizes)
def rev_list(self, refs, count=None, parse=None, format=None): self._require_command('rev-list') assert (count is None) or (isinstance(count, Integral)) if format: assert '\n' not in format assert parse for ref in refs: assert ref assert '\n' not in ref self.check_busy() self._busy = 'rev-list' conn = self.conn conn.write('rev-list\n') if count is not None: conn.write(str(count)) conn.write('\n') if format: conn.write(format) conn.write('\n') for ref in refs: conn.write(ref) conn.write('\n') conn.write('\n') if not format: for _ in range(len(refs)): line = conn.readline() if not line: raise ClientError('unexpected EOF') line = line.strip() assert len(line) == 40 yield line else: for _ in range(len(refs)): line = conn.readline() if not line: raise ClientError('unexpected EOF') if not line.startswith('commit '): raise ClientError('unexpected line ' + repr(line)) yield line[7:].strip(), parse(conn) # FIXME: confusing not_ok = self.check_ok() if not_ok: raise not_ok self._not_busy()
def test_reverse_suffix_duplicates(): suffix = lambda x: tuple(vfs._reverse_suffix_duplicates(x)) wvpasseq((b'x', ), suffix((b'x', ))) wvpasseq((b'x', b'y'), suffix((b'x', b'y'))) wvpasseq((b'x-1', b'x-0'), suffix((b'x', ) * 2)) wvpasseq([b'x-%02d' % n for n in reversed(range(11))], list(suffix((b'x', ) * 11))) wvpasseq((b'x-1', b'x-0', b'y'), suffix((b'x', b'x', b'y'))) wvpasseq((b'x', b'y-1', b'y-0'), suffix((b'x', b'y', b'y'))) wvpasseq((b'x', b'y-1', b'y-0', b'z'), suffix((b'x', b'y', b'y', b'z')))
def read_resolution(port): n = read_vuint(port) result = [] for i in range(n): name = read_bvec(port) have_item = ord(port.read(1)) assert have_item in (0, 1) item = read_item(port) if have_item else None result.append((name, item)) return tuple(result)
def testpacks(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(next(pi)).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20))
def testpacks(): with no_lingering_errors(): with test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(b'%d' % i)) log('\n') nameprefix = w.close() print(repr(nameprefix)) WVPASS(os.path.exists(nameprefix + b'.pack')) WVPASS(os.path.exists(nameprefix + b'.idx')) r = git.open_idx(nameprefix + b'.idx') print(repr(r.fanout)) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists(b'\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(hexlify(next(pi)), hexlify(h)) WVFAIL(r.find_offset(b'\0'*20)) r = git.PackIdxList(bupdir + b'/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists(b'\0'*20))
def test_pack_name_lookup(tmpdir): environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) git.verbose = 1 packdir = git.repo(b'objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(b'%d' % i)) log('\n') idxnames.append(os.path.basename(w.close() + b'.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))
def test_pack_name_lookup(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def _reverse_suffix_duplicates(strs): """Yields the elements of strs, with any runs of duplicate values suffixed with -N suffixes, where the zero padded integer N decreases to 0 by 1 (e.g. 10, 09, ..., 00). """ for name, duplicates in groupby(strs): ndup = len(tuple(duplicates)) if ndup == 1: yield name else: ndig = len(str(ndup - 1)) fmt = '%s-' + '%0' + str(ndig) + 'd' for i in range(ndup - 1, -1, -1): yield fmt % (name, i)
def _open(self): if not self.file: objdir = dir = os.path.join(self.repo_dir, b'objects') fd, name = tempfile.mkstemp(suffix=b'.pack', dir=objdir) try: self.file = os.fdopen(fd, 'w+b') except: os.close(fd) raise try: self.parentfd = os.open(objdir, os.O_RDONLY) except: f = self.file self.file = None f.close() raise assert name.endswith(b'.pack') self.filename = name[:-5] self.file.write(b'PACK\0\0\0\2\0\0\0\0') self.idx = list(list() for i in range(256))
def _raw_write_bwlimit(f, buf, bwcount, bwtime): if not bwlimit: f.write(buf) return (len(buf), time.time()) else: # We want to write in reasonably large blocks, but not so large that # they're likely to overflow a router's queue. So our bwlimit timing # has to be pretty granular. Also, if it takes too long from one # transmit to the next, we can't just make up for lost time to bring # the average back up to bwlimit - that will risk overflowing the # outbound queue, which defeats the purpose. So if we fall behind # by more than one block delay, we shouldn't ever try to catch up. for i in range(0, len(buf), 4096): now = time.time() next = max(now, bwtime + 1.0 * bwcount / bwlimit) time.sleep(next - now) sub = buf[i:i + 4096] f.write(sub) bwcount = len(sub) # might be less than 4096 bwtime = next return (bwcount, bwtime)
def _raw_write_bwlimit(f, buf, bwcount, bwtime): if not bwlimit: f.write(buf) return (len(buf), time.time()) else: # We want to write in reasonably large blocks, but not so large that # they're likely to overflow a router's queue. So our bwlimit timing # has to be pretty granular. Also, if it takes too long from one # transmit to the next, we can't just make up for lost time to bring # the average back up to bwlimit - that will risk overflowing the # outbound queue, which defeats the purpose. So if we fall behind # by more than one block delay, we shouldn't ever try to catch up. for i in range(0,len(buf),4096): now = time.time() next = max(now, bwtime + 1.0*bwcount/bwlimit) time.sleep(next-now) sub = buf[i:i+4096] f.write(sub) bwcount = len(sub) # might be less than 4096 bwtime = next return (bwcount, bwtime)
def testencode(): with no_lingering_errors(): s = 'hello world' looseb = ''.join(git._encode_looseobj('blob', s)) looset = ''.join(git._encode_looseobj('tree', s)) loosec = ''.join(git._encode_looseobj('commit', s)) packb = ''.join(git._encode_packobj('blob', s)) packt = ''.join(git._encode_packobj('tree', s)) packc = ''.join(git._encode_packobj('commit', s)) WVPASSEQ(git._decode_looseobj(looseb), ('blob', s)) WVPASSEQ(git._decode_looseobj(looset), ('tree', s)) WVPASSEQ(git._decode_looseobj(loosec), ('commit', s)) WVPASSEQ(git._decode_packobj(packb), ('blob', s)) WVPASSEQ(git._decode_packobj(packt), ('tree', s)) WVPASSEQ(git._decode_packobj(packc), ('commit', s)) for i in range(10): WVPASS(git._encode_looseobj('blob', s, compression_level=i)) def encode_pobj(n): return ''.join(git._encode_packobj('blob', s, compression_level=n)) WVEXCEPT(ValueError, encode_pobj, -1) WVEXCEPT(ValueError, encode_pobj, 10) WVEXCEPT(ValueError, encode_pobj, 'x')
def exists(self, hash, want_source=False): """Return nonempty if the object exists in the index files.""" global _total_searches _total_searches += 1 if hash in self.also: return True if self.do_bloom and self.bloom: if self.bloom.exists(hash): self.do_bloom = False else: _total_searches -= 1 # was counted by bloom return None for i in range(len(self.packs)): p = self.packs[i] _total_searches -= 1 # will be incremented by sub-pack ix = p.exists(hash, want_source=want_source) if ix: # reorder so most recently used packs are searched first self.packs = [p] + self.packs[:i] + self.packs[i + 1:] return ix self.do_bloom = True return None
def iter(self, name=None, wantrecurse=None): dname = name if dname and not dname.endswith('/'): dname += '/' ofs = self.children_ofs assert (ofs <= len(self._m)) assert (self.children_n <= UINT_MAX) # i.e. python struct 'I' for i in range(self.children_n): eon = self._m.find('\0', ofs) assert (eon >= 0) assert (eon >= ofs) assert (eon > ofs) basename = self._m[ofs:ofs + (eon - ofs)] child = ExistingEntry(self, basename, self.name + basename, self._m, eon + 1) if (not dname or child.name.startswith(dname) or child.name.endswith('/') and dname.startswith(child.name)): if not wantrecurse or wantrecurse(child): for e in child.iter(name=name, wantrecurse=wantrecurse): yield e if not name or child.name == name or child.name.startswith(dname): yield child ofs = eon + 1 + ENTLEN
def iter(self, name=None, wantrecurse=None): dname = name if dname and not dname.endswith('/'): dname += '/' ofs = self.children_ofs assert(ofs <= len(self._m)) assert(self.children_n <= UINT_MAX) # i.e. python struct 'I' for i in range(self.children_n): eon = self._m.find('\0', ofs) assert(eon >= 0) assert(eon >= ofs) assert(eon > ofs) basename = str(buffer(self._m, ofs, eon-ofs)) child = ExistingEntry(self, basename, self.name + basename, self._m, eon+1) if (not dname or child.name.startswith(dname) or child.name.endswith('/') and dname.startswith(child.name)): if not wantrecurse or wantrecurse(child): for e in child.iter(name=name, wantrecurse=wantrecurse): yield e if not name or child.name == name or child.name.startswith(dname): yield child ofs = eon + 1 + ENTLEN
def columnate(l, prefix): """Format elements of 'l' in columns with 'prefix' leading each line. The number of columns is determined automatically based on the string lengths. """ if not l: return "" l = l[:] clen = max(len(s) for s in l) ncols = (tty_width() - len(prefix)) // (clen + 2) if ncols <= 1: ncols = 1 clen = 0 cols = [] while len(l) % ncols: l.append('') rows = len(l) // ncols for s in compat.range(0, len(l), rows): cols.append(l[s:s+rows]) out = '' for row in zip(*cols): out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n' return out
def columnate(l, prefix): """Format elements of 'l' in columns with 'prefix' leading each line. The number of columns is determined automatically based on the string lengths. """ if not l: return "" l = l[:] clen = max(len(s) for s in l) ncols = (tty_width() - len(prefix)) // (clen + 2) if ncols <= 1: ncols = 1 clen = 0 cols = [] while len(l) % ncols: l.append('') rows = len(l) // ncols for s in compat.range(0, len(l), rows): cols.append(l[s:s + rows]) out = '' for row in zip(*cols): out += prefix + ''.join(('%-*s' % (clen + 2, s)) for s in row) + '\n' return out
def __iter__(self): start = self.sha_ofs for ofs in range(start, start + 20 * self.nsha, 20): yield self.map[ofs:ofs + 20]
def _group(l, count): for i in range(0, len(l), count): yield l[i:i+count]
def __iter__(self): for i in range(self._fanget(self.entries-1)): yield buffer(self.shatable, i*20, 20)
def sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + basename(p) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=compression, run_midx=False, on_pack_finish=remove_stale_files) # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): if verbosity: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) idx = git.open_idx(idx_name) idx_live_count = 0 for i in range(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % git.repo_rel(basename(idx_name))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for i in range(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): item_it = cat_pipe.get(sha.encode('hex')) _, typ, _ = next(item_it) writer.just_write(sha, typ, ''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') if verbosity: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo('objects/pack') assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) assert(not glob.glob(os.path.join(pack_dir, '*.midx'))) # try/catch should call writer.abort()? # This will finally run midx. writer.close() # Can only change refs (if needed) after this. remove_stale_files(None) # In case we didn't write to the writer. if verbosity: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir, verbosity)) / float(existing_count) * 100))
def __iter__(self): for i in range(self.fanout[255]): yield buffer(self.map, 256*4 + 24*i + 4, 20)
o.fatal('filenames expected') if opt.seed != None: random.seed(opt.seed) for name in extra: log('Damaging "%s"...\n' % name) f = open(name, 'r+b') st = os.fstat(f.fileno()) size = st.st_size if opt.percent or opt.size: ms1 = int(float(opt.percent or 0)/100.0*size) or size ms2 = opt.size or size maxsize = min(ms1, ms2) else: maxsize = 1 chunks = opt.num or 10 chunksize = size/chunks for r in range(chunks): sz = random.randrange(1, maxsize+1) if sz > size: sz = size if opt.equal: ofs = r*chunksize else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) f.seek(ofs) f.write(randblock(sz)) f.close()
def randblock(n): l = [] for i in range(n): l.append(chr(random.randrange(0,256))) return ''.join(l)
def randbytes(sz): s = b'' for i in range(sz): s += bytes_from_uint(random.randrange(0, 256)) return s
git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) report(-1) _helpers.random_sha() report(0) if opt.existing: def foreverit(mi): while 1: for e in mi: yield e objit = iter(foreverit(m)) for c in range(opt.cycles): for n in range(opt.number): if opt.existing: bin = next(objit) assert(m.exists(bin)) else: bin = _helpers.random_sha() # technically, a randomly generated object id might exist. # but the likelihood of that is the likelihood of finding # a collision in sha-1 by accident, which is so unlikely that # we don't care. assert(not m.exists(bin)) report((c+1)*opt.number) if bloom._total_searches:
def __init__(self): self.idx = list(list() for i in range(256)) self.count = 0
def __iter__(self): for i in range(self.fanout[255]): yield buffer(self.map, 8 + 256*4 + 20*i, 20)