def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) git.check_repo_or_die() tags = [t for sublist in git.tags().values() for t in sublist] if opt.delete: # git.delete_ref() doesn't complain if a ref doesn't exist. We # could implement this verification but we'd need to read in the # contents of the tag file and pass the hash, and we already know # about the tag's existance via "tags". tag_name = argv_bytes(opt.delete) if not opt.force and tag_name not in tags: log("error: tag '%s' doesn't exist\n" % path_msg(tag_name)) sys.exit(1) tag_file = b'refs/tags/%s' % tag_name git.delete_ref(tag_file) sys.exit(0) if not extra: for t in tags: sys.stdout.flush() out = byte_stream(sys.stdout) out.write(t) out.write(b'\n') sys.exit(0) elif len(extra) != 2: o.fatal('expected commit ref and hash') tag_name, commit = map(argv_bytes, extra[:2]) if not tag_name: o.fatal("tag name must not be empty.") debug1("args: tag name = %s; commit = %s\n" % (path_msg(tag_name), commit.decode('ascii'))) if tag_name in tags and not opt.force: log("bup: error: tag '%s' already exists\n" % path_msg(tag_name)) sys.exit(1) if tag_name.startswith(b'.'): o.fatal("'%s' is not a valid tag name." % path_msg(tag_name)) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) with git.PackIdxList(git.repo(b'objects/pack')) as pL: if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) git.update_ref(b'refs/tags/' + tag_name, hash, None, force=True)
def test_pack_name_lookup(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() with git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) as mi: def do_predict(ix, out): total = len(ix) maxdiff = 0 for count,i in enumerate(ix): prefix = struct.unpack('!Q', i[:8])[0] expected = prefix * total // (1 << 64) diff = count - expected maxdiff = max(maxdiff, abs(diff)) out.write(b'%d of %d (%.3f%%) ' % (maxdiff, len(ix), maxdiff * 100.0 / len(ix))) out.flush() assert(count+1 == len(ix)) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.predict: if opt.ignore_midx: for pack in mi.packs: do_predict(pack, out) else: do_predict(mi, out) else: # default mode: find longest matching prefix last = b'\0'*20 longmatch = 0 for i in mi: if i == last: continue #assert(str(i) >= last) pm = _helpers.bitmatch(last, i) longmatch = max(longmatch, pm) last = i out.write(b'%d\n' % longmatch) log('%d matching prefix bits\n' % longmatch) doublings = math.log(len(mi), 2) bpd = longmatch / doublings log('%.2f bits per doubling\n' % bpd) remain = 160 - longmatch rdoublings = remain / bpd log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings)) larger = 2**rdoublings log('%g times larger is possible\n' % larger) perperson = larger/POPULATION_OF_EARTH log('\nEveryone on earth could have %d data sets like yours, all in one\n' 'repository, and we would expect 1 object collision.\n' % int(perperson))
def test_midx_refreshing(tmpdir): environ[b'BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob(s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + b'/objects/pack') assert len(pi.packs) == 2 pi.refresh() assert len(pi.packs) == 2 assert sorted([os.path.basename(i.name) for i in pi.packs]) == sorted([p1base, p2base]) p1 = git.open_idx(p1name) assert p1.exists(s1sha) p2 = git.open_idx(p2name) assert not p2.exists(s1sha) assert p2.exists(s2sha) subprocess.call([path.exe(), b'midx', b'-f']) pi.refresh() assert len(pi.packs) == 1 pi.refresh(skip_midx=True) assert len(pi.packs) == 2 pi.refresh(skip_midx=False) assert len(pi.packs) == 1
def __init__(self, cfg_file, create=False): super(EncryptedRepo, self).__init__(cfg_file, create) # init everything for __del__ in case we get an exception here self.storage = None self.data_writer = None self.meta_writer = None self.cfg_file = cfg_file self.ec_cache = {} if libnacl is None: raise Exception("Encrypted repositories require libnacl") if self.max_pack_size is None: self.max_pack_size = 1000 * 1000 * 1000 self.cachedir = self.config(b'bup.cachedir', opttype='path') if self.cachedir is None: raise Exception("encrypted repositories need a 'cachedir'") if create: mkdirp(self.cachedir) if not os.path.isdir(self.cachedir): raise Exception( "cachedir doesn't exist or isn't a directory - may have to init the repo?" ) self.storage = get_storage(self, create=create) self.readkey = None self.repokey = None self.writekey = None self.refsname = self.config(b'bup.refsname') if self.refsname is None: self.refsname = b'refs' readkey = self.config(b'bup.readkey') if readkey is not None: self.readkey = libnacl.public.SecretKey(unhexlify(readkey)) repokey = self.config(b'bup.repokey') if repokey is not None: self.repokey = unhexlify(repokey) writekey = self.config(b'bup.writekey') if writekey is not None: self.writekey = unhexlify(writekey) if self.readkey is not None: assert self.writekey == self.readkey.pk else: assert self.readkey is not None, "at least one of 'readkey' or 'writekey' is required" self.writekey = self.readkey.pk self.compression = self.compression_level if self.compression is None: self.compression = -1 self.separatemeta = self.config(b'bup.separatemeta', opttype='bool') self.data_written_objs = set() if self.separatemeta: self.meta_written_objs = set() else: self.meta_written_objs = self.data_written_objs self._synchronize_idxes() self.idxlist = git.PackIdxList(self.cachedir)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if extra: o.fatal('no arguments expected') git.check_repo_or_die() sys.stdout.flush() out = byte_stream(sys.stdout) report(-1, out) _helpers.random_sha() report(0, out) with git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) as m: if opt.existing: def foreverit(mi): while 1: for e in mi: yield e objit = iter(foreverit(m)) for c in range(opt.cycles): for n in range(opt.number): if opt.existing: bin = next(objit) assert (m.exists(bin)) else: bin = _helpers.random_sha() # technically, a randomly generated object id might exist. # but the likelihood of that is the likelihood of finding # a collision in sha-1 by accident, which is so unlikely that # we don't care. assert (not m.exists(bin)) report((c + 1) * opt.number, out) if bloom._total_searches: out.write( b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n' % (bloom._total_searches, bloom._total_steps, bloom._total_steps * 1.0 / bloom._total_searches)) if midx._total_searches: out.write( b'midx: %d objects searched in %d steps: avg %.3f steps/object\n' % (midx._total_searches, midx._total_steps, midx._total_steps * 1.0 / midx._total_searches)) if git._total_searches: out.write( b'idx: %d objects searched in %d steps: avg %.3f steps/object\n' % (git._total_searches, git._total_steps, git._total_steps * 1.0 / git._total_searches)) out.write(b'Total time: %.3fs\n' % (time.time() - start))
def test_midx_close(): fddir = b'/proc/self/fd' try: os.listdir(fddir) except Exception: # not supported, not Linux, I guess return def openfiles(): for fd in os.listdir(fddir): try: yield os.readlink(os.path.join(fddir, fd)) except OSError: pass def force_midx(objdir): args = [path.exe(), b'midx', b'--auto', b'--dir', objdir] check_call(args) with no_lingering_errors(), \ test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) # create a few dummy idxes for i in range(10): _create_idx(tmpdir, i) git.auto_midx(tmpdir) l = git.PackIdxList(tmpdir) # this doesn't exist (yet) WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0))) for i in range(10, 15): _create_idx(tmpdir, i) # delete the midx ... # TODO: why do we need to? git.auto_midx() below doesn't?! for fn in os.listdir(tmpdir): if fn.endswith(b'.midx'): os.unlink(os.path.join(tmpdir, fn)) # and make a new one git.auto_midx(tmpdir) # check it still doesn't exist - we haven't refreshed WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0))) # check that we still have the midx open, this really # just checks more for the kernel API ('deleted' string) for fn in openfiles(): if not b'midx-' in fn: continue WVPASSEQ(True, b'deleted' in fn) # refresh the PackIdxList l.refresh() # and check that an object in pack 10 exists now WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0))) for fn in openfiles(): if not b'midx-' in fn: continue # check that we don't have it open anymore WVPASSEQ(False, b'deleted' in fn)
def testpacks(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20)) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def testpacks(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20))
def testpacks(): with no_lingering_errors(): with test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(b'%d' % i)) log('\n') nameprefix = w.close() print(repr(nameprefix)) WVPASS(os.path.exists(nameprefix + b'.pack')) WVPASS(os.path.exists(nameprefix + b'.idx')) r = git.open_idx(nameprefix + b'.idx') print(repr(r.fanout)) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists(b'\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(hexlify(next(pi)), hexlify(h)) WVFAIL(r.find_offset(b'\0'*20)) r = git.PackIdxList(bupdir + b'/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists(b'\0'*20))
def testpacks(): os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp' subprocess.call(['rm', '-rf', bupdir]) git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList('pybuptest.tmp/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20))
def test_pack_name_lookup(tmpdir): environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) git.verbose = 1 packdir = git.repo(b'objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(b'%d' % i)) log('\n') idxnames.append(os.path.basename(w.close() + b'.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))
def test_midx_refreshing(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-') os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob(s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + '/objects/pack') WVPASSEQ(len(pi.packs), 2) pi.refresh() WVPASSEQ(len(pi.packs), 2) WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]), sorted([p1base, p2base])) p1 = git.open_idx(p1name) WVPASS(p1.exists(s1sha)) p2 = git.open_idx(p2name) WVFAIL(p2.exists(s1sha)) WVPASS(p2.exists(s2sha)) subprocess.call([bupmain, 'midx', '-f']) pi.refresh() WVPASSEQ(len(pi.packs), 1) pi.refresh(skip_midx=True) WVPASSEQ(len(pi.packs), 2) pi.refresh(skip_midx=False) WVPASSEQ(len(pi.packs), 1) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def test_midx_refreshing(): with no_lingering_errors(): with test_tempdir('bup-tclient-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob( s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + '/objects/pack') WVPASSEQ(len(pi.packs), 2) pi.refresh() WVPASSEQ(len(pi.packs), 2) WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]), sorted([p1base, p2base])) p1 = git.open_idx(p1name) WVPASS(p1.exists(s1sha)) p2 = git.open_idx(p2name) WVFAIL(p2.exists(s1sha)) WVPASS(p2.exists(s2sha)) subprocess.call([bupmain, 'midx', '-f']) pi.refresh() WVPASSEQ(len(pi.packs), 1) pi.refresh(skip_midx=True) WVPASSEQ(len(pi.packs), 2) pi.refresh(skip_midx=False) WVPASSEQ(len(pi.packs), 1)
def test_pack_name_lookup(): with no_lingering_errors(), test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def test_pack_name_lookup(): os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp' subprocess.call(['rm','-rf', bupdir]) git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0,28,2): w = git.PackWriter() for i in range(start, start+2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e,idxname in enumerate(idxnames): for i in range(e*2, (e+1)*2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def _make_objcache(self): return git.PackIdxList(self.cachedir)
log("bup: error: tag '%s' already exists\n" % tag_name) sys.exit(1) if tag_name.startswith('.'): o.fatal("'%s' is not a valid tag name." % tag_name) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit) sys.exit(2) pL = git.PackIdxList(git.repo('objects/pack')) if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit) sys.exit(2) tag_file = git.repo('refs/tags/%s' % tag_name) try: tag = file(tag_file, 'w') except OSError as e: log("bup: error: could not create tag '%s': %s" % (tag_name, e)) sys.exit(3) tag.write(hash.encode('hex')) tag.close()
optspec = """ bup margin -- predict Guess object offsets and report the maximum deviation ignore-midx Don't use midx files; use only plain pack idx files. """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() mi = git.PackIdxList(git.repo('objects/pack'), ignore_midx=opt.ignore_midx) def do_predict(ix): total = len(ix) maxdiff = 0 for count, i in enumerate(ix): prefix = struct.unpack('!Q', i[:8])[0] expected = prefix * total / (1 << 64) diff = count - expected maxdiff = max(maxdiff, abs(diff)) print('%d of %d (%.3f%%) ' % (maxdiff, len(ix), maxdiff * 100.0 / len(ix))) sys.stdout.flush() assert (count + 1 == len(ix))