def bup_gc(threshold=10, compression=1, verbosity=0): cat_pipe = git.cp() existing_count = count_objects(git.repo('objects/pack'), verbosity) if verbosity: log('found %d objects\n' % existing_count) if not existing_count: if verbosity: log('nothing to collect\n') else: try: live_objects = find_live_objects(existing_count, cat_pipe, verbosity=verbosity) except MissingObject as ex: log('bup: missing object %r \n' % ex.id.encode('hex')) sys.exit(1) try: # FIXME: just rename midxes and bloom, and restore them at the end if # we didn't change any packs? if verbosity: log('clearing midx files\n') midx.clear_midxes() if verbosity: log('clearing bloom filter\n') bloom.clear_bloom(git.repo('objects/pack')) if verbosity: log('clearing reflog\n') expirelog_cmd = ['git', 'reflog', 'expire', '--all', '--expire=all'] expirelog = subprocess.Popen(expirelog_cmd, preexec_fn = git._gitenv()) git._git_wait(' '.join(expirelog_cmd), expirelog) if verbosity: log('removing unreachable data\n') sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity) finally: live_objects.close()
def find_live_objects(existing_count, cat_pipe, opt): prune_visited_trees = True # In case we want a command line option later pack_dir = git.repo('objects/pack') ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir) os.close(ffd) # FIXME: allow selection of k? # FIXME: support ephemeral bloom filters (i.e. *never* written to disk) live_objs = bloom.create(bloom_filename, expected=existing_count, k=None) stop_at, trees_visited = None, None if prune_visited_trees: trees_visited = set() stop_at = lambda (x): x.decode('hex') in trees_visited approx_live_count = 0 for ref_name, ref_id in git.list_refs(): for item in walk_object(cat_pipe, ref_id.encode('hex'), stop_at=stop_at, include_data=None): # FIXME: batch ids if opt.verbose: report_live_item(approx_live_count, existing_count, ref_name, ref_id, item) bin_id = item.id.decode('hex') if trees_visited is not None and item.type == 'tree': trees_visited.add(bin_id) if opt.verbose: if not live_objs.exists(bin_id): live_objs.add(bin_id) approx_live_count += 1 else: live_objs.add(bin_id) trees_visited = None if opt.verbose: log('expecting to retain about %.2f%% unnecessary objects\n' % live_objs.pfalse_positive()) return live_objs
def __init__(self, remote, create=False): self._busy = self.conn = None self.sock = self.p = self.pout = self.pin = None is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse: assert(not remote) remote = '%s:' % is_reverse (self.protocol, self.host, self.port, self.dir) = parse_remote(remote) self.cachedir = git.repo('index-cache/%s' % re.sub(r'[^@\w]', '_', "%s:%s" % (self.host, self.dir))) if is_reverse: self.pout = os.fdopen(3, 'rb') self.pin = os.fdopen(4, 'wb') self.conn = Conn(self.pout, self.pin) else: if self.protocol in ('ssh', 'file'): try: # FIXME: ssh and file shouldn't use the same module self.p = ssh.connect(self.host, self.port, 'server') self.pout = self.p.stdout self.pin = self.p.stdin self.conn = Conn(self.pout, self.pin) except OSError, e: raise ClientError, 'connect: %s' % e, sys.exc_info()[2] elif self.protocol == 'bup': self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect((self.host, atoi(self.port) or 1982)) self.sockw = self.sock.makefile('wb') self.conn = DemuxConn(self.sock.fileno(), self.sockw)
def test_multiple_suggestions(): with no_lingering_errors(): with test_tempdir('bup-tclient-') as tmpdir: os.environ['BUP_MAIN_EXE'] = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) lw = git.PackWriter() lw.new_blob(s1) lw.close() lw = git.PackWriter() lw.new_blob(s2) lw.close() WVPASSEQ(len(glob.glob(git.repo('objects/pack'+IDX_PAT))), 2) c = client.Client(bupdir, create=True) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 0) rw = c.new_packwriter() s1sha = rw.new_blob(s1) WVPASS(rw.exists(s1sha)) s2sha = rw.new_blob(s2) # This is a little hacky, but ensures that we test the # code under test while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and not c.conn.has_input()): pass rw.new_blob(s2) WVPASS(rw.objcache.exists(s1sha)) WVPASS(rw.objcache.exists(s2sha)) rw.new_blob(s3) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2) rw.close() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 3)
def test_pack_name_lookup(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0,28,2): w = git.PackWriter() for i in range(start, start+2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e,idxname in enumerate(idxnames): for i in range(e*2, (e+1)*2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def test_long_index(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) w = git.PackWriter() obj_bin = struct.pack('!IIIII', 0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899) obj2_bin = struct.pack('!IIIII', 0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900) obj3_bin = struct.pack('!IIIII', 0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011) pack_bin = struct.pack('!IIIII', 0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100) idx = list(list() for i in xrange(256)) idx[0].append((obj_bin, 1, 0xfffffffff)) idx[0x11].append((obj2_bin, 2, 0xffffffffff)) idx[0x22].append((obj3_bin, 3, 0xff)) (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects')) os.close(fd) w.count = 3 r = w._write_pack_idx_v2(name, idx, pack_bin) i = git.PackIdxV2(name, open(name, 'rb')) WVPASSEQ(i.find_offset(obj_bin), 0xfffffffff) WVPASSEQ(i.find_offset(obj2_bin), 0xffffffffff) WVPASSEQ(i.find_offset(obj3_bin), 0xff) if wvfailure_count() == initial_failures: os.remove(name) subprocess.call(['rm', '-rf', tmpdir])
def test_multiple_suggestions(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-') os.environ['BUP_MAIN_EXE'] = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) lw = git.PackWriter() lw.new_blob(s1) lw.close() lw = git.PackWriter() lw.new_blob(s2) lw.close() WVPASSEQ(len(glob.glob(git.repo('objects/pack'+IDX_PAT))), 2) c = client.Client(bupdir, create=True) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 0) rw = c.new_packwriter() s1sha = rw.new_blob(s1) WVPASS(rw.exists(s1sha)) s2sha = rw.new_blob(s2) # This is a little hacky, but ensures that we test the code under test while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and not c.conn.has_input()): pass rw.new_blob(s2) WVPASS(rw.objcache.exists(s1sha)) WVPASS(rw.objcache.exists(s2sha)) rw.new_blob(s3) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2) rw.close() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 3) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def __init__(self, remote, create=False): self._busy = self.conn = self.p = self.pout = self.pin = None is_reverse = os.environ.get('BUP_SERVER_REVERSE') if is_reverse: assert(not remote) remote = '%s:' % is_reverse rs = remote.split(':', 1) if len(rs) == 1: (host, dir) = (None, remote) else: (host, dir) = rs (self.host, self.dir) = (host, dir) self.cachedir = git.repo('index-cache/%s' % re.sub(r'[^@\w]', '_', "%s:%s" % (host, dir))) try: if is_reverse: self.pout = os.fdopen(3, 'rb') self.pin = os.fdopen(4, 'wb') else: self.p = ssh.connect(host, 'server') self.pout = self.p.stdout self.pin = self.p.stdin except OSError, e: raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
def send_index(conn, name): _init_session() assert name.find("/") < 0 assert name.endswith(".idx") idx = git.open_idx(git.repo("objects/pack/%s" % name)) conn.write(struct.pack("!I", len(idx.map))) conn.write(idx.map) conn.ok()
def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.open_idx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok()
def list_indexes(conn, junk): git.check_repo_or_die() suffix = '' if dumb_server_mode: suffix = ' load' for f in os.listdir(git.repo('objects/pack')): if f.endswith('.idx'): conn.write('%s%s\n' % (f, suffix)) conn.ok()
def list_indexes(conn, junk): _init_session() suffix = "" if dumb_server_mode: suffix = " load" for f in os.listdir(git.repo("objects/pack")): if f.endswith(".idx"): conn.write("%s%s\n" % (f, suffix)) conn.ok()
def clear_index(indexfile): indexfiles = [indexfile, indexfile + ".meta", indexfile + ".hlink"] for indexfile in indexfiles: path = git.repo(indexfile) try: os.remove(path) if opt.verbose: log("clear: removed %s\n" % path) except OSError as e: if e.errno != errno.ENOENT: raise
def clear_index(indexfile): indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink'] for indexfile in indexfiles: path = git.repo(indexfile) try: os.remove(path) if opt.verbose: log('clear: removed %s\n' % path) except OSError, e: if e.errno != errno.ENOENT: raise
def test_dumb_client_server(): os.environ['BUP_MAIN_EXE'] = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'buptest_tclient.tmp' subprocess.call(['rm', '-rf', bupdir]) git.init_repo(bupdir) open(git.repo('bup-dumb-server'), 'w').close() lw = git.PackWriter() lw.new_blob(s1) lw.close() c = client.Client(bupdir, create=True) rw = c.new_packwriter() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s1) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s2) rw.close() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
def test_dumb_client_server(): with no_lingering_errors(), test_tempdir('bup-tclient-') as tmpdir: os.environ['BUP_MAIN_EXE'] = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) open(git.repo('bup-dumb-server'), 'w').close() lw = git.PackWriter() lw.new_blob(s1) lw.close() c = client.Client(bupdir, create=True) rw = c.new_packwriter() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s1) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s2) rw.close() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
def sync_indexes_del(self): self.check_busy() conn = self.conn conn.write('list-indexes\n') packdir = git.repo('objects/pack') all = {} needed = {} for line in linereader(conn): if not line: break all[line] = 1 assert(line.find('/') < 0) if not os.path.exists(os.path.join(self.cachedir, line)): needed[line] = 1 self.check_ok() mkdirp(self.cachedir) for f in os.listdir(self.cachedir): if f.endswith('.idx') and not f in all: log('pruning old index: %r\n' % f) os.unlink(os.path.join(self.cachedir, f))
def test_dumb_client_server(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-') os.environ['BUP_MAIN_EXE'] = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) open(git.repo('bup-dumb-server'), 'w').close() lw = git.PackWriter() lw.new_blob(s1) lw.close() c = client.Client(bupdir, create=True) rw = c.new_packwriter() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s1) WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1) rw.new_blob(s2) rw.close() WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def test_pack_name_lookup(): os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp' subprocess.call(['rm','-rf', bupdir]) git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0,28,2): w = git.PackWriter() for i in range(start, start+2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e,idxname in enumerate(idxnames): for i in range(e*2, (e+1)*2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def test_long_index(): w = git.PackWriter() obj_bin = struct.pack('!IIIII', 0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899) obj2_bin = struct.pack('!IIIII', 0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900) obj3_bin = struct.pack('!IIIII', 0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011) pack_bin = struct.pack('!IIIII', 0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100) idx = list(list() for i in xrange(256)) idx[0].append((obj_bin, 1, 0xfffffffff)) idx[0x11].append((obj2_bin, 2, 0xffffffffff)) idx[0x22].append((obj3_bin, 3, 0xff)) (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects')) os.close(fd) w.count = 3 r = w._write_pack_idx_v2(name, idx, pack_bin) i = git.PackIdxV2(name, open(name, 'rb')) WVPASSEQ(i.find_offset(obj_bin), 0xfffffffff) WVPASSEQ(i.find_offset(obj2_bin), 0xffffffffff) WVPASSEQ(i.find_offset(obj3_bin), 0xff) os.remove(name)
def test_pack_name_lookup(): with no_lingering_errors(), test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0,28,2): w = git.PackWriter() for i in range(start, start+2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e,idxname in enumerate(idxnames): for i in range(e*2, (e+1)*2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def find_live_objects(existing_count, cat_pipe, verbosity=0): prune_visited_trees = True # In case we want a command line option later pack_dir = git.repo('objects/pack') ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir) os.close(ffd) # FIXME: allow selection of k? # FIXME: support ephemeral bloom filters (i.e. *never* written to disk) live_objs = bloom.create(bloom_filename, expected=existing_count, k=None) # live_objs will hold on to the fd until close or exit os.unlink(bloom_filename) stop_at, trees_visited = None, None if prune_visited_trees: trees_visited = set() stop_at = lambda (x): x.decode('hex') in trees_visited approx_live_count = 0 for ref_name, ref_id in git.list_refs(): for item in walk_object(cat_pipe, ref_id.encode('hex'), stop_at=stop_at, include_data=None): # FIXME: batch ids if verbosity: report_live_item(approx_live_count, existing_count, ref_name, ref_id, item, verbosity) if trees_visited is not None and item.type == 'tree': trees_visited.add(item.oid) if verbosity: if not live_objs.exists(item.oid): live_objs.add(item.oid) approx_live_count += 1 else: live_objs.add(item.oid) trees_visited = None if verbosity: log('expecting to retain about %.2f%% unnecessary objects\n' % live_objs.pfalse_positive()) return live_objs
def __init__(self, remote, create=False): self._busy = self.conn = None self.sock = self.p = self.pout = self.pin = None is_reverse = os.environ.get("BUP_SERVER_REVERSE") if is_reverse: assert not remote remote = "%s:" % is_reverse (self.protocol, self.host, self.port, self.dir) = parse_remote(remote) self.cachedir = git.repo("index-cache/%s" % re.sub(r"[^@\w]", "_", "%s:%s" % (self.host, self.dir))) if is_reverse: self.pout = os.fdopen(3, "rb") self.pin = os.fdopen(4, "wb") self.conn = Conn(self.pout, self.pin) else: if self.protocol in ("ssh", "file"): try: # FIXME: ssh and file shouldn't use the same module self.p = ssh.connect(self.host, self.port, "server") self.pout = self.p.stdout self.pin = self.p.stdin self.conn = Conn(self.pout, self.pin) except OSError as e: raise ClientError, "connect: %s" % e, sys.exc_info()[2] elif self.protocol == "bup": self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect((self.host, atoi(self.port) or 1982)) self.sockw = self.sock.makefile("wb") self.conn = DemuxConn(self.sock.fileno(), self.sockw) if self.dir: self.dir = re.sub(r"[\r\n]", " ", self.dir) if create: self.conn.write("init-dir %s\n" % self.dir) else: self.conn.write("set-dir %s\n" % self.dir) self.check_ok() self.sync_indexes()
def test_pack_name_lookup(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
-- n,number= number of objects per cycle [10000] c,cycles= number of cycles to run [100] ignore-midx ignore .midx files, use only .idx files existing test with existing objects instead of fake ones """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') git.ignore_midx = opt.ignore_midx git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) report(-1) _helpers.random_sha() report(0) if opt.existing: def foreverit(mi): while 1: for e in mi: yield e objit = iter(foreverit(m)) for c in range(opt.cycles): for n in range(opt.number): if opt.existing:
def main(argv): # Hack around lack of nonlocal vars in python 2 _nonlocal = {} o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.indexfile: opt.indexfile = argv_bytes(opt.indexfile) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.strip_path: opt.strip_path = argv_bytes(opt.strip_path) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") extra = [argv_bytes(x) for x in extra] opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.date: date = parse_date_or_fatal(opt.date, o.fatal) else: date = time.time() if opt.strip and opt.strip_path: o.fatal("--strip is incompatible with --strip-path") graft_points = [] if opt.graft: if opt.strip: o.fatal("--strip is incompatible with --graft") if opt.strip_path: o.fatal("--strip-path is incompatible with --graft") for (option, parameter) in flags: if option == "--graft": parameter = argv_bytes(parameter) splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal( "a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") graft_points.append( (resolve_parent(old_path), resolve_parent(new_path))) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") name = opt.name if name and not valid_save_name(name): o.fatal("'%s' is not a valid branch name" % path_msg(name)) refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: try: cli = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter(compression_level=opt.compress) handle_ctrl_c() # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory # elements, in the order they're listed in the index. # # Since the git tree elements are sorted according to # git.shalist_item_sort_key, the metalist items are accumulated as # (sort_key, metadata) tuples, and then sorted when the .bupm file is # created. The sort_key should have been computed using the element's # mangled name and git mode (after hashsplitting), but the code isn't # actually doing that but rather uses the element's real name and mode. # This makes things a bit more difficult when reading it back, see # vfs.ordered_tree_entries(). # Maintain a stack of information representing the current location in # the archive being constructed. The current path is recorded in # parts, which will be something like ['', 'home', 'someuser'], and # the accumulated content and metadata for of the dirs in parts is # stored in parallel stacks in shalists and metalists. parts = [] # Current archive position (stack of dir names). shalists = [] # Hashes for each dir in paths. metalists = [] # Metadata for each dir in paths. def _push(part, metadata): # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) metalists.append([(b'', metadata)]) # This dir's metadata (no name). def _pop(force_tree, dir_metadata=None): # Leave the current archive directory and add its tree to its parent. assert (len(parts) >= 1) part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() # FIXME: only test if collision is possible (i.e. given --strip, etc.)? if force_tree: tree = force_tree else: names_seen = set() clean_list = [] metaidx = 1 # entry at 0 is for the dir for x in shalist: name = x[1] if name in names_seen: parent_path = b'/'.join(parts) + b'/' add_error('error: ignoring duplicate path %s in %s' % (path_msg(name), path_msg(parent_path))) if not stat.S_ISDIR(x[0]): del metalist[metaidx] else: names_seen.add(name) clean_list.append(x) if not stat.S_ISDIR(x[0]): metaidx += 1 if dir_metadata: # Override the original metadata pushed for this dir. metalist = [(b'', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key=lambda x: x[0]) metadata = b''.join([m[1].encode() for m in sorted_metalist]) metadata_f = BytesIO(metadata) mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata_f], keep_boundaries=False) clean_list.append((mode, b'.bupm', id)) tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree)) return tree _nonlocal['count'] = 0 _nonlocal['subcount'] = 0 _nonlocal['lastremain'] = None def progress_report(n): _nonlocal['subcount'] += n cc = _nonlocal['count'] + _nonlocal['subcount'] pct = total and (cc * 100.0 / total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc / 1024. / elapsed) kps_frac = 10**int(math.log(kps + 1, 10) - 1) kps = int(kps / kps_frac) * kps_frac if cc: remain = elapsed * 1.0 / cc * (total - cc) else: remain = 0.0 if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] < 0.05)): remain = _nonlocal['lastremain'] else: _nonlocal['lastremain'] = remain hours = int(remain / 60 / 60) mins = int(remain / 60 - hours * 60) secs = int(remain - hours * 60 * 60 - mins * 60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs qprogress( 'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr)) indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + b'.meta') except IOError as ex: if ex.errno != EACCES: raise log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() def find_hardlink_target(hlink_db, ent): if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1: link_paths = hlink_db.node_paths(ent.dev, ent.ino) if link_paths: return link_paths[0] total = ftotal = 0 if opt.progress: for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): qprogress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report # Root collisions occur when strip or graft options map more than one # path to the same directory (paths which originally had separate # parents). When that situation is detected, use empty metadata for # the parent. Otherwise, use the metadata for the common parent. # Collision example: "bup save ... --strip /foo /foo/bar /bar". # FIXME: Add collision tests, or handle collisions some other way. # FIXME: Detect/handle strip/graft name collisions (other than root), # i.e. if '/foo/bar' and '/bar' both map to '/'. first_root = None root_collision = None tstart = time.time() fcount = 0 lastskip_name = None lastdir = b'' for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: if opt.verbose: log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue assert (dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, extra) elif opt.strip_path: dirp = stripped_path_components(dir, [opt.strip_path]) elif graft_points: dirp = grafted_path_components(graft_points, dir) else: dirp = path_components(dir) # At this point, dirp contains a representation of the archive # path that looks like [(archive_dir_name, real_fs_path), ...]. # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp # might look like this at some point: # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: first_root = dirp[0] elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree=None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path, normalized=True) \ if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name meta = metadata.Metadata() _push(dir_name, meta) if not file: if len(parts) == 1: continue # We're at the top level -- keep the current root dir # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree=oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(GIT_MODE_TREE, newtree) ent.repack() if exists and wasmissing: _nonlocal['count'] += oldsize continue # it's not a directory if hashvalid: id = ent.sha git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (ent.gitmode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) meta = msr.metadata_at(ent.meta_ofs) meta.hardlink_target = find_hardlink_target(hlink_db, ent) # Restore the times that were cleared to 0 in the metastore. (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) metalists[-1].append((sort_key, meta)) else: id = None hlink = find_hardlink_target(hlink_db, ent) try: meta = metadata.from_path( ent.name, hardlink_target=hlink, normalized=True, after_stat=after_nondir_metadata_stat) except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name continue if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode): # The mode changed since we indexed the file, this is bad. # This can cause two issues: # 1) We e.g. think the file is a regular file, but now it's # something else (a device, socket, FIFO or symlink, etc.) # and _read_ from it when we shouldn't. # 2) We then record it as valid, but don't update the index # metadata, and on a subsequent save it has 'hashvalid' # but is recorded as the file type from the index, when # the content is something else ... # Avoid all of these consistency issues by just skipping such # things - it really ought to not happen anyway. add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name)) lastskip_name = ent.name continue if stat.S_ISREG(ent.mode): try: # If the file changes while we're reading it, then our reading # may stop at some point, but the stat() above may have gotten # a different size already. Recalculate the meta size so that # the repository records the accurate size in the metadata, even # if the other stat() data might be slightly older than the file # content (which we can't fix, this is inherently racy, but we # can prevent the size mismatch.) meta.size = 0 def new_blob(data): meta.size += len(data) return w.new_blob(data) before_saving_regular_file(ent.name) with hashsplit.open_noatime(ent.name) as f: (mode, id) = hashsplit.split_to_blob_or_tree( new_blob, w.new_tree, [f], keep_boundaries=False) except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name elif stat.S_ISDIR(ent.mode): assert (0) # handled above elif stat.S_ISLNK(ent.mode): mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target)) else: # Everything else should be fully described by its # metadata, so just record an empty blob, so the paths # in the tree and .bupm will match up. (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) ent.repack() git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (mode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) metalists[-1].append((sort_key, meta)) if exists and wasmissing: _nonlocal['count'] += oldsize _nonlocal['subcount'] = 0 if opt.progress: pct = total and _nonlocal['count'] * 100.0 / total or 100 progress( 'Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal)) while len(parts) > 1: # _pop() all the parts above the root _pop(force_tree=None) assert (len(shalists) == 1) assert (len(metalists) == 1) # Finish the root directory. tree = _pop( force_tree=None, # When there's a collision, use empty metadata for the root. dir_metadata=metadata.Metadata() if root_collision else None) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.tree: out.write(hexlify(tree)) out.write(b'\n') if opt.commit or name: if compat.py_maj > 2: # Strip b prefix from python 3 bytes reprs to preserve previous format msgcmd = b'[%s]' % b', '.join( [repr(argv_bytes(x))[1:].encode('ascii') for x in argv]) else: msgcmd = repr(argv) msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname())) commit = w.new_commit(tree, oldref, userline, date, None, userline, date, None, msg) if opt.commit: out.write(hexlify(commit)) out.write(b'\n') msr.close() w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def main(): handle_ctrl_c() is_reverse = os.environ.get('BUP_SERVER_REVERSE') opt = parse_args(sys.argv) git.check_repo_or_die() src_dir = opt.source or git.repo() if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if is_reverse and opt.remote: misuse("don't use -r in reverse mode; it's automatic") if opt.remote or is_reverse: dest_repo = RemoteRepo(opt.remote) else: dest_repo = LocalRepo() with dest_repo as dest_repo: with LocalRepo(repo_dir=src_dir) as src_repo: with dest_repo.new_packwriter( compression_level=opt.compress) as writer: src_repo = LocalRepo(repo_dir=src_dir) # Resolve and validate all sources and destinations, # implicit or explicit, and do it up-front, so we can # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, src_repo, dest_repo) updated_refs = { } # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = { 'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'replace': handle_replace, 'unnamed': handle_unnamed } for item in target_items: debug1('get-spec: %s\n' % str(item.spec)) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith('/.tag/'): dest_ref = 'refs/tags/%s' % dest_path[6:] else: dest_ref = 'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash cur_ref = cur_ref or dest_hash handler = handlers[item.spec.method] item_result = handler(item, src_repo, writer, opt) if len(item_result) > 1: new_id, tree = item_result else: new_id = item_result[0] if not dest_ref: log_item(item.spec.src, item.src.type, opt) else: updated_refs[dest_ref] = (orig_ref, new_id) if dest_ref.startswith('refs/tags/'): log_item(item.spec.src, item.src.type, opt, tag=new_id) else: log_item(item.spec.src, item.src.type, opt, tree=tree, commit=new_id) # Only update the refs at the very end, once the writer is # closed, so that if something goes wrong above, the old refs # will be undisturbed. for ref_name, info in updated_refs.iteritems(): orig_ref, new_ref = info try: dest_repo.update_ref(ref_name, new_ref, orig_ref) if opt.verbose: new_hex = new_ref.encode('hex') if orig_ref: orig_hex = orig_ref.encode('hex') log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError), ex: add_error('unable to update ref %r: %s' % (ref_name, ex))
-- n,number= number of objects per cycle [10000] c,cycles= number of cycles to run [100] ignore-midx ignore .midx files, use only .idx files existing test with existing objects instead of fake ones """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no arguments expected') git.ignore_midx = opt.ignore_midx git.check_repo_or_die() m = git.PackIdxList(git.repo('objects/pack')) report(-1) _helpers.random_sha() report(0) if opt.existing: def foreverit(mi): while 1: for e in mi: yield e objit = iter(foreverit(m)) for c in xrange(opt.cycles):
if extra: o.fatal('no positional parameters expected') if opt.threshold: try: opt.threshold = int(opt.threshold) except ValueError: o.fatal('threshold must be an integer percentage value') if opt.threshold < 0 or opt.threshold > 100: o.fatal('threshold must be an integer percentage value') git.check_repo_or_die() cat_pipe = vfs.cp() existing_count = count_objects(git.repo('objects/pack')) if opt.verbose: log('found %d objects\n' % existing_count) if not existing_count: if opt.verbose: log('nothing to collect\n') else: live_objects = find_live_objects(existing_count, cat_pipe, opt) try: # FIXME: just rename midxes and bloom, and restore them at the end if # we didn't change any packs? if opt.verbose: log('clearing midx files\n') midx.clear_midxes() if opt.verbose: log('clearing bloom filter\n') bloom.clear_bloom(git.repo('objects/pack')) if opt.verbose: log('clearing reflog\n')
def update_index(top, excluded_paths, exclude_rxs): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') hashgen = None if opt.fake_valid: def hashgen(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for (path, pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) index # always invalidates "faked" entries, because "old != new" # in from_stat(). meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.from_stat(pst, meta_ofs, tstart, check_device=opt.check_device) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=hashgen) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino)
(opt, flags, extra) = o.parse(sys.argv[1:]) par2_setup() if opt.par2_ok: if par2_ok: sys.exit(0) # 'true' in sh else: sys.exit(1) if opt.disable_par2: par2_ok = 0 git.check_repo_or_die() if not extra: debug('fsck: No filenames given: checking all packs.\n') extra = glob.glob(git.repo('objects/pack/*.pack')) code = 0 count = 0 outstanding = {} for name in extra: if name.endswith('.pack'): base = name[:-5] elif name.endswith('.idx'): base = name[:-4] elif name.endswith('.par2'): base = name[:-5] elif os.path.exists(name + '.pack'): base = name else: raise Exception('%s is not a pack file!' % name)
opt.format, { "status": "saving", "percentage": pct, "bytes_done": cc, "bytes_total": total, "files_done": fcount, "files_total": ftotal, "time_remaining": remain, "time_remaining_str": remainstr, "speed": kps, }, ) indexfile = opt.indexfile or git.repo("bupindex") r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + ".meta") except IOError, ex: if ex.errno != EACCES: raise log("error: cannot access %r; have you run bup index?" % indexfile) sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + ".hlink") def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') fake_hash = None if opt.fake_valid: def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for path, pst in recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs, xdev_exceptions=xdev_exceptions): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed need_repack = False if (rig.cur.stale(pst, tstart, check_device=opt.check_device)): try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) # "faked" entries will be stale(), and so we'll invalidate # them below. meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.update_from_stat(pst, meta_ofs) rig.cur.invalidate() need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): if fake_hash: rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID need_repack = True if opt.fake_invalid: rig.cur.invalidate() need_repack = True if need_repack: rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) hlinks.prepare_save() if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile, msw, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? mi.add_ixentry(e) ri.close() mi.close() wr.close() wi.abort() else: wi.close() msw.close() hlinks.commit_save()
def update_index(top, excluded_paths, exclude_rxs): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') hashgen = None if opt.fake_valid: def hashgen(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() qprogress('Indexing: %d\r' % total) elif not (total % 128): qprogress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) index # always invalidates "faked" entries, because "old != new" # in from_stat(). meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.from_stat(pst, meta_ofs, tstart, check_device=opt.check_device) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen = hashgen) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino)
if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') if opt.clear and opt.indexfile: o.fatal('cannot clear an external index (via -f)') # FIXME: remove this once we account for timestamp races, i.e. index; # touch new-file; index. It's possible for this to happen quickly # enough that new-file ends up with the same timestamp as the first # index, and then bup will ignore it. tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) if opt.clear: log('clear: clearing index.\n') clear_index(indexfile) excluded_paths = parse_excludes(flags, o.fatal) exclude_rxs = parse_rx_excludes(flags, o.fatal) paths = index.reduce_paths(extra)
def sweep(live_objects, existing_count, cat_pipe, opt): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if opt.verbose and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if opt.verbose: log('removing ' + basename(p) + '\n') os.unlink(p) ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=opt.compress, run_midx=False, on_pack_finish=remove_stale_files) # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): if opt.verbose: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) idx = git.open_idx(idx_name) idx_live_count = 0 for i in xrange(0, len(idx)): sha = idx.shatable[i * 20:(i + 1) * 20] if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if opt.verbose: log('deleting %s\n' % git.repo_rel(basename(idx_name))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - opt.threshold) / 100.0): if opt.verbose: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) continue if opt.verbose: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for i in xrange(0, len(idx)): sha = idx.shatable[i * 20:(i + 1) * 20] if live_objects.exists(sha): item_it = cat_pipe.get(sha.encode('hex')) type = item_it.next() writer.write(sha, type, ''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') if opt.verbose: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo('objects/pack') assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) assert (not glob.glob(os.path.join(pack_dir, '*.midx'))) # try/catch should call writer.abort()? # This will finally run midx. writer.close() # Can only change refs (if needed) after this. remove_stale_files(None) # In case we didn't write to the writer. if opt.verbose: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir)) / float(existing_count) * 100))
def main(argv): global opt, par2_ok o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) opt.verbose = opt.verbose or 0 par2_setup() if opt.par2_ok: if par2_ok: sys.exit(0) # 'true' in sh else: sys.exit(1) if opt.disable_par2: par2_ok = 0 git.check_repo_or_die() if extra: extra = [argv_bytes(x) for x in extra] else: debug('fsck: No filenames given: checking all packs.\n') extra = glob.glob(git.repo(b'objects/pack/*.pack')) sys.stdout.flush() out = byte_stream(sys.stdout) code = 0 count = 0 outstanding = {} for name in extra: if name.endswith(b'.pack'): base = name[:-5] elif name.endswith(b'.idx'): base = name[:-4] elif name.endswith(b'.par2'): base = name[:-5] elif os.path.exists(name + b'.pack'): base = name else: raise Exception('%r is not a pack file!' % name) (dir, last) = os.path.split(base) par2_exists = os.path.exists(base + b'.par2') if par2_exists and os.stat(base + b'.par2').st_size == 0: par2_exists = 0 sys.stdout.flush( ) # Not sure we still need this, but it'll flush out too debug('fsck: checking %r (%s)\n' % (last, par2_ok and par2_exists and 'par2' or 'git')) if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.jobs: nc = do_pack(base, last, par2_exists, out) code = code or nc count += 1 else: while len(outstanding) >= opt.jobs: (pid, nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 pid = os.fork() if pid: # parent outstanding[pid] = 1 else: # child try: sys.exit(do_pack(base, last, par2_exists, out)) except Exception as e: log('exception: %r\n' % e) sys.exit(99) while len(outstanding): (pid, nc) = os.wait() nc >>= 8 if pid in outstanding: del outstanding[pid] code = code or nc count += 1 if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if istty2: debug('fsck done. \n') sys.exit(code)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if not (opt.modified or \ opt['print'] or \ opt.status or \ opt.update or \ opt.check or \ opt.clear): opt.update = 1 if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') if opt.clear and opt.indexfile: o.fatal('cannot clear an external index (via -f)') # FIXME: remove this once we account for timestamp races, i.e. index; # touch new-file; index. It's possible for this to happen quickly # enough that new-file ends up with the same timestamp as the first # index, and then bup will ignore it. tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() handle_ctrl_c() if opt.verbose is None: opt.verbose = 0 if opt.indexfile: indexfile = argv_bytes(opt.indexfile) else: indexfile = git.repo(b'bupindex') if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile), opt.verbose) if opt.clear: log('clear: clearing index.\n') clear_index(indexfile, opt.verbose) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') extra = [argv_bytes(x) for x in extra] excluded_paths = parse_excludes(flags, o.fatal) exclude_rxs = parse_rx_excludes(flags, o.fatal) xexcept = index.unique_resolved_paths(extra) for rp, path in index.reduce_paths(extra): update_index(rp, excluded_paths, exclude_rxs, indexfile, check=opt.check, check_device=opt.check_device, xdev=opt.xdev, xdev_exceptions=xexcept, fake_valid=opt.fake_valid, fake_invalid=opt.fake_invalid, out=out, verbose=opt.verbose) if opt['print'] or opt.status or opt.modified: extra = [argv_bytes(x) for x in extra] for name, ent in index.Reader(indexfile).filter(extra or [b'']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue line = b'' if opt.status: if ent.is_deleted(): line += b'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: line += b'A ' else: line += b'M ' else: line += b' ' if opt.hash: line += hexlify(ent.sha) + b' ' if opt.long: line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'), oct(ent.gitmode).encode('ascii')) out.write(line + (name or b'./') + b'\n') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n') check_index(index.Reader(indexfile), opt.verbose) if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
log("bup: error: tag '%s' already exists\n" % path_msg(tag_name)) sys.exit(1) if tag_name.startswith(b'.'): o.fatal("'%s' is not a valid tag name." % path_msg(tag_name)) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) pL = git.PackIdxList(git.repo(b'objects/pack')) if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) tag_file = git.repo(b'refs/tags/' + tag_name) try: tag = open(tag_file, 'wb') except OSError as e: log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e)) sys.exit(3) with tag as tag: tag.write(hexlify(hash)) tag.write(b'\n')
#!/usr/bin/env python import sys from bup import options, git, _hashsplit from bup.helpers import * optspec = """ bup margin """ o = options.Options("bup margin", optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() # git.ignore_midx = 1 mi = git.PackIdxList(git.repo("objects/pack")) last = "\0" * 20 longmatch = 0 for i in mi: if i == last: continue # assert(str(i) >= last) pm = _hashsplit.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print longmatch
def list_indexes(self): for f in os.listdir(git.repo(b'objects/pack', repo_dir=self.repo_dir)): yield f
def sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + path_msg(basename(new_pack_prefix)) + '\n') for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + path_msg(basename(p)) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=compression, run_midx=False, on_pack_finish=remove_stale_files) try: # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob( os.path.join(git.repo(b'objects/pack'), b'*.idx')): if verbosity: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) with git.open_idx(idx_name) as idx: idx_live_count = 0 for sha in idx: if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % path_msg(git.repo_rel(basename(idx_name)))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + b'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel( basename(idx_name)), live_frac * 100)) continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for sha in idx: if live_objects.exists(sha): item_it = cat_pipe.get(hexlify(sha)) _, typ, _ = next(item_it) writer.just_write(sha, typ, b''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + b'pack') if verbosity: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo(b'objects/pack') assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom'))) assert (not glob.glob(os.path.join(pack_dir, b'*.midx'))) except BaseException as ex: with pending_raise(ex): writer.abort() # This will finally run midx. # Can only change refs (if needed) after this. writer.close() remove_stale_files(None) # In case we didn't write to the writer. if verbosity: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir, verbosity)) / float(existing_count) * 100))
def send_index(self, name, conn, send_size): data = git.open_idx(git.repo(b'objects/pack/%s' % name, repo_dir=self.repo_dir)).map send_size(len(data)) conn.write(data)
# enough that new-file ends up with the same timestamp as the first # index, and then bup will ignore it. tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() handle_ctrl_c() if opt.verbose is None: opt.verbose = 0 if opt.indexfile: indexfile = argv_bytes(opt.indexfile) else: indexfile = git.repo(b'bupindex') if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) if opt.clear: log('clear: clearing index.\n') clear_index(indexfile) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given')
def dumb_server_mode(self): if self._dumb_server_mode is None: self._dumb_server_mode = os.path.exists(git.repo(b'bup-dumb-server', repo_dir=self.repo_dir)) return self._dumb_server_mode
def __init__(self, repo_dir=None): self.repo_dir = repo_dir or git.repo() self._cp = git.cp(repo_dir) self.rev_list = partial(git.rev_list, repo_dir=repo_dir)
def update_index(top, excluded_paths): tmax = time.time() - 1 ri = index.Reader(indexfile) wi = index.Writer(indexfile, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) hashgen = None if opt.fake_valid: def hashgen(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) for (path, pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() qprogress('Indexing: %d\r' % total) elif not (total % 128): qprogress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() rig.next() if rig.cur and rig.cur.name == path: # paths that already existed if pst: rig.cur.from_stat(pst, tstart) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths wi.add(path, pst, hashgen=hashgen) progress('Indexing: %d, done.\n' % total) if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? mi.add_ixentry(e) ri.close() mi.close() wr.close() wi.abort() else: wi.close()
def sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + basename(p) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=compression, run_midx=False, on_pack_finish=remove_stale_files) # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): if verbosity: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) idx = git.open_idx(idx_name) idx_live_count = 0 for i in xrange(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % git.repo_rel(basename(idx_name))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for i in xrange(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): item_it = cat_pipe.get(sha.encode('hex')) type = item_it.next() writer.just_write(sha, type, ''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') if verbosity: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo('objects/pack') assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) assert(not glob.glob(os.path.join(pack_dir, '*.midx'))) # try/catch should call writer.abort()? # This will finally run midx. writer.close() # Can only change refs (if needed) after this. remove_stale_files(None) # In case we didn't write to the writer. if verbosity: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir, verbosity)) / float(existing_count) * 100))
remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc/1024, total/1024, fcount, ftotal, remainstr, kpsstr)) indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + b'.meta') except IOError as ex: if ex.errno != EACCES: raise log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent):
assert(opt.max_files >= 5) if opt.check: # check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, '*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: do_midx(git.repo('objects/pack'), opt.output, extra, '') elif opt.auto or opt.force: paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) do_midx_dir(path) else: o.fatal("you must use -f or -a or provide input filenames") if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
assert (opt.max_files >= 5) if opt.check: # check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, '*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: do_midx(git.repo('objects/pack'), opt.output, extra, '') elif opt.auto or opt.force: paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) do_midx_dir(path) else: o.fatal("you must use -f or -a or provide input filenames") if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
optspec = """ bup margin -- predict Guess object offsets and report the maximum deviation ignore-midx Don't use midx files; use only plain pack idx files. """ o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no arguments expected") git.check_repo_or_die() mi = git.PackIdxList(git.repo('objects/pack'), ignore_midx=opt.ignore_midx) def do_predict(ix): total = len(ix) maxdiff = 0 for count, i in enumerate(ix): prefix = struct.unpack('!Q', i[:8])[0] expected = prefix * total / (1 << 64) diff = count - expected maxdiff = max(maxdiff, abs(diff)) print('%d of %d (%.3f%%) ' % (maxdiff, len(ix), maxdiff * 100.0 / len(ix))) sys.stdout.flush() assert (count + 1 == len(ix))
def main(argv): handle_ctrl_c() opt = opts_from_cmdline(argv) client.bwlimit = opt.bwlimit git.check_repo_or_die() remote_dest = opt.remote or opt.is_reverse if not remote_dest: repo = git cli = nullcontext() else: try: cli = repo = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) # cli creation must be last nontrivial command in each if clause above with cli: if not remote_dest: w = git.PackWriter(compression_level=opt.compress) else: w = cli.new_packwriter(compression_level=opt.compress) with w: sys.stdout.flush() out = byte_stream(sys.stdout) if opt.name: refname = b'refs/heads/%s' % opt.name parent = repo.read_ref(refname) else: refname = parent = None indexfile = opt.indexfile or git.repo(b'bupindex') try: msr = index.MetaStoreReader(indexfile + b'.meta') except IOError as ex: if ex.errno != ENOENT: raise log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) with msr, \ hlinkdb.HLinkDB(indexfile + b'.hlink') as hlink_db, \ index.Reader(indexfile) as reader: tree = save_tree(opt, reader, hlink_db, msr, w) if opt.tree: out.write(hexlify(tree)) out.write(b'\n') if opt.commit or opt.name: commit = commit_tree(tree, parent, opt.date, argv, w) if opt.commit: out.write(hexlify(commit)) out.write(b'\n') # packwriter must be closed before we can update the ref if opt.name: repo.update_ref(refname, commit, parent) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def _set_mode(): global dumb_server_mode dumb_server_mode = os.path.exists(git.repo('bup-dumb-server')) debug1('bup server: serving in %s mode\n' % (dumb_server_mode and 'dumb' or 'smart'))
def __init__(self, repo_dir=None): self.repo_dir = realpath(repo_dir or git.repo()) self._cp = git.cp(self.repo_dir) self.update_ref = partial(git.update_ref, repo_dir=self.repo_dir) self.rev_list = partial(git.rev_list, repo_dir=self.repo_dir) self._id = _repo_id(self.repo_dir)
log("bup: error: tag '%s' already exists\n" % tag_name) sys.exit(1) if tag_name.startswith('.'): o.fatal("'%s' is not a valid tag name." % tag_name) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit) sys.exit(2) pL = git.PackIdxList(git.repo('objects/pack')) if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit) sys.exit(2) tag_file = git.repo('refs/tags/%s' % tag_name) try: tag = file(tag_file, 'w') except OSError as e: log("bup: error: could not create tag '%s': %s" % (tag_name, e)) sys.exit(3) tag.write(hash.encode('hex')) tag.close()
# check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, b'*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: sys.stdout.flush() do_midx(git.repo(b'objects/pack'), opt.output, extra, b'', byte_stream(sys.stdout)) elif opt.auto or opt.force: sys.stdout.flush() paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path_msg(path)) do_midx_dir(path, opt.output, byte_stream(sys.stdout)) else: o.fatal("you must use -f or -a or provide input filenames") if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) git.check_repo_or_die() tags = [t for sublist in git.tags().values() for t in sublist] if opt.delete: # git.delete_ref() doesn't complain if a ref doesn't exist. We # could implement this verification but we'd need to read in the # contents of the tag file and pass the hash, and we already know # about the tag's existance via "tags". tag_name = argv_bytes(opt.delete) if not opt.force and tag_name not in tags: log("error: tag '%s' doesn't exist\n" % path_msg(tag_name)) sys.exit(1) tag_file = b'refs/tags/%s' % tag_name git.delete_ref(tag_file) sys.exit(0) if not extra: for t in tags: sys.stdout.flush() out = byte_stream(sys.stdout) out.write(t) out.write(b'\n') sys.exit(0) elif len(extra) != 2: o.fatal('expected commit ref and hash') tag_name, commit = map(argv_bytes, extra[:2]) if not tag_name: o.fatal("tag name must not be empty.") debug1("args: tag name = %s; commit = %s\n" % (path_msg(tag_name), commit.decode('ascii'))) if tag_name in tags and not opt.force: log("bup: error: tag '%s' already exists\n" % path_msg(tag_name)) sys.exit(1) if tag_name.startswith(b'.'): o.fatal("'%s' is not a valid tag name." % path_msg(tag_name)) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) pL = git.PackIdxList(git.repo(b'objects/pack')) if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) tag_file = git.repo(b'refs/tags/' + tag_name) try: tag = open(tag_file, 'wb') except OSError as e: log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e)) sys.exit(3) with tag as tag: tag.write(hexlify(hash)) tag.write(b'\n')
def main(): handle_ctrl_c() opt = parse_args(sys.argv) if opt.source: opt.source = argv_bytes(opt.source) src_dir = opt.source or git.repo() if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.remote: opt.remote = argv_bytes(opt.remote) dest_repo = repo.from_opts(opt) with dest_repo as dest_repo: with LocalRepo(repo_dir=src_dir) as src_repo: # Resolve and validate all sources and destinations, # implicit or explicit, and do it up-front, so we can # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, src_repo, dest_repo) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = { 'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'replace': handle_replace, 'unnamed': handle_unnamed } for item in target_items: debug1('get-spec: %r\n' % (item.spec, )) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith(b'/.tag/'): dest_ref = b'refs/tags/%s' % dest_path[6:] else: dest_ref = b'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash cur_ref = cur_ref or dest_hash handler = handlers[item.spec.method] item_result = handler(item, src_repo, dest_repo, opt) if len(item_result) > 1: new_id, tree = item_result else: new_id = item_result[0] if not dest_ref: log_item(item.spec.src, item.src.type, opt) else: updated_refs[dest_ref] = (orig_ref, new_id) if dest_ref.startswith(b'refs/tags/'): log_item(item.spec.src, item.src.type, opt, tag=new_id) else: log_item(item.spec.src, item.src.type, opt, tree=tree, commit=new_id) # Only update the refs at the very end, once the destination repo # finished writing, so that if something goes wrong above, the old # refs will be undisturbed. for ref_name, info in items(updated_refs): orig_ref, new_ref = info try: dest_repo.update_ref(ref_name, new_ref, orig_ref) if opt.verbose: new_hex = hexlify(new_ref) if orig_ref: orig_hex = hexlify(orig_ref) log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError) as ex: add_error('unable to update ref %r: %s' % (ref_name, ex)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)