def handle_sigterm(signum, frame): global io_loop debug1('\nbup-web: signal %d received\n' % signum) log('Shutdown requested\n') if not io_loop: sys.exit(0) io_loop.stop()
def _suggest_packs(self): ob = self._busy if ob: assert(ob == 'receive-objects-v2') self.conn.write('\xff\xff\xff\xff') # suspend receive-objects-v2 suggested = [] for line in linereader(self.conn): if not line: break debug2('%s\n' % line) if line.startswith('index '): idx = line[6:] debug1('client: received index suggestion: %s\n' % git.shorten_hash(idx)) suggested.append(idx) else: assert(line.endswith('.idx')) debug1('client: completed writing pack, idx: %s\n' % git.shorten_hash(line)) suggested.append(line) self.check_ok() if ob: self._busy = None idx = None for idx in suggested: self.sync_index(idx) git.auto_midx(self.cachedir) if ob: self._busy = ob self.conn.write('%s\n' % ob) return idx
def resolve_branch_dest(spec, src, dest_repo, fatal): # Resulting dest must be treeish, or not exist. if not spec.dest: # Pick a default dest. if src.type == 'branch': spec = spec._replace(dest=spec.src) elif src.type == 'save': spec = spec._replace(dest=get_save_branch(spec.src)) elif src.path.startswith('/.tag/'): # Dest defaults to the same. spec = spec._replace(dest=spec.src) spec_args = '%s %s' % (spec.argopt, spec.argval) if not spec.dest: fatal('no destination (implicit or explicit) for %r', spec_args) dest = find_vfs_item(spec.dest, dest_repo) if dest: if dest.type == 'commit': fatal('destination for %r is a tagged commit, not a branch' % spec_args) if dest.type != 'branch': fatal('destination for %r is a %s, not a branch' % (spec_args, dest.type)) else: dest = default_loc._replace(path=cleanup_vfs_path(spec.dest)) if dest.path.startswith('/.'): fatal('destination for %r must be a valid branch name' % spec_args) debug1('dest: %s\n' % loc_desc(dest)) return spec, dest
def _init_session(reinit_with_new_repopath=None): if reinit_with_new_repopath is None and git.repodir: return git.check_repo_or_die(reinit_with_new_repopath) # OK. we now know the path is a proper repository. Record this path in the # environment so that subprocesses inherit it and know where to operate. os.environ["BUP_DIR"] = git.repodir debug1("bup server: bupdir is %r\n" % git.repodir) _set_mode()
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.name = filename self.rwfile = None self.map = None assert(filename.endswith('.bloom')) if readwrite: assert(expected > 0) self.rwfile = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.rwfile, close=False) else: self.map = mmap_readwrite(self.rwfile, close=False) else: self.rwfile = None f = f or open(filename, 'rb') self.map = mmap_read(f) got = str(self.map[0:4]) if got != 'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = str(self.map[16 + 2**self.bits:]) if idxnamestr: self.idxnames = idxnamestr.split('\0') else: self.idxnames = []
def resolve_src(spec, src_repo): src = find_vfs_item(spec.src, src_repo) spec_args = spec_msg(spec) if not src: misuse('cannot find source for %s' % spec_args) if src.type == 'root': misuse('cannot fetch entire repository for %s' % spec_args) if src.type == 'tags': misuse('cannot fetch entire /.tag directory for %s' % spec_args) debug1('src: %s\n' % loc_desc(src)) return src
def resolve_src(spec, src_repo, fatal): src = find_vfs_item(spec.src, src_repo) spec_args = '%s %s' % (spec.argopt, spec.argval) if not src: fatal('cannot find source for %r' % spec_args) if src.hash == vfs.EMPTY_SHA.encode('hex'): fatal('cannot find source for %r (no hash)' % spec_args) if src.type == 'root': fatal('cannot fetch entire repository for %r' % spec_args) debug1('src: %s\n' % loc_desc(src)) return src
def init_session(self, repo_dir=None): if self.repo and repo_dir: self.repo.close() self.repo = None self.suspended = False if not self.repo: self.repo = self._backend(repo_dir) self.dumb_server_mode = self.repo.config(b'bup.dumb-server', opttype='bool') debug1('bup server: bupdir is %r\n' % self.repo.repo_dir) debug1('bup server: serving in %s mode\n' % (self.dumb_server_mode and 'dumb' or 'smart'))
def handler(signum, frame): debug1('\nbup: signal %d received\n' % signum) if not p or not forward_signals: return if signum != signal.SIGTSTP: os.kill(p.pid, signum) else: # SIGTSTP: stop the child, then ourselves. os.kill(p.pid, signal.SIGSTOP) signal.signal(signal.SIGTSTP, signal.SIG_DFL) os.kill(os.getpid(), signal.SIGTSTP) # Back from suspend -- reestablish the handler. signal.signal(signal.SIGTSTP, handler) ret = 94
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) opt.output = argv_bytes(opt.output) if opt.output else None if extra and (opt.auto or opt.force): o.fatal("you can't use -f/-a and also provide filenames") if opt.check and (not extra and not opt.auto): o.fatal("if using --check, you must provide filenames or -a") git.check_repo_or_die() if opt.max_files < 0: opt.max_files = max_files() assert(opt.max_files >= 5) path = opt.dir and argv_bytes(opt.dir) or git.repo(b'objects/pack') extra = [argv_bytes(x) for x in extra] if opt.check: # check existing midx files if extra: midxes = extra else: debug1('midx: scanning %s\n' % path) midxes = glob.glob(os.path.join(path, b'*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: sys.stdout.flush() do_midx(path, opt.output, extra, b'', byte_stream(sys.stdout), auto=opt.auto, force=opt.force, print_names=opt.print) elif opt.auto or opt.force: sys.stdout.flush() debug1('midx: scanning %s\n' % path_msg(path)) do_midx_dir(path, opt.output, byte_stream(sys.stdout), auto=opt.auto, force=opt.force, max_files=opt.max_files) else: o.fatal("you must use -f or -a or provide input filenames") if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
def _init_session(reinit_with_new_repopath=None): global repo if reinit_with_new_repopath is None and git.repodir: if not repo: repo = LocalRepo() return git.check_repo_or_die(reinit_with_new_repopath) if repo: repo.close() repo = LocalRepo() # OK. we now know the path is a proper repository. Record this path in the # environment so that subprocesses inherit it and know where to operate. os.environ['BUP_DIR'] = git.repodir debug1('bup server: bupdir is %r\n' % git.repodir) _set_mode()
def _init_session(reinit_with_new_repopath=None): global repo if reinit_with_new_repopath is None and git.repodir: if not repo: repo = LocalRepo() return git.check_repo_or_die(reinit_with_new_repopath) if repo: repo.close() repo = LocalRepo() # OK. we now know the path is a proper repository. Record this path in the # environment so that subprocesses inherit it and know where to operate. environ[b'BUP_DIR'] = git.repodir debug1('bup server: bupdir is %s\n' % path_msg(git.repodir)) _set_mode()
def do_midx_dir(path, outfilename): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n],n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name,1) for sz,name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]] part2 = all[len(all)-DESIRED_LWM+1:] all = list(do_midx_group(path, outfilename, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz,name in all: if not existed.get(name): print name
def do_midx_dir(path, outfilename): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path, i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n], n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name, 1) for sz, name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz, name in all[:len(all) - DESIRED_LWM + 1]] part2 = all[len(all) - DESIRED_LWM + 1:] all = list(do_midx_group(path, outfilename, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz, name in all: if not existed.get(name): print name
def main(argv): # Give the subcommand exclusive access to stdin. orig_stdin = os.dup(0) devnull = os.open(os.devnull, os.O_RDONLY) os.dup2(devnull, 0) os.close(devnull) o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if len(extra) < 1: o.fatal('command is required') subcmd = extra debug2('bup mux: starting %r\n' % (extra, )) outr, outw = os.pipe() errr, errw = os.pipe() def close_fds(): os.close(outr) os.close(errr) p = subprocess.Popen(subcmd, stdin=orig_stdin, stdout=outw, stderr=errw, close_fds=False, preexec_fn=close_fds) os.close(outw) os.close(errw) sys.stdout.flush() out = byte_stream(sys.stdout) out.write(b'BUPMUX') out.flush() mux(p, out.fileno(), outr, errr) os.close(outr) os.close(errr) prv = p.wait() if prv: debug1('%s exited with code %d\n' % (extra[0], prv)) debug1('bup mux: done\n') sys.exit(prv)
def create(name, expected, delaywrite=None, f=None, k=None): """Create and return a bloom filter for `expected` entries.""" bits = int(math.floor(math.log(expected * MAX_BITS_EACH / 8, 2))) k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4) if bits > MAX_BLOOM_BITS[k]: log('bloom: warning, max bits exceeded, non-optimal\n') bits = MAX_BLOOM_BITS[k] debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k)) f = f or open(name, 'w+b') f.write('BLOM') f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0)) assert (f.tell() == 16) # NOTE: On some systems this will not extend+zerofill, but it does on # darwin, linux, bsd and solaris. f.truncate(16 + 2**bits) f.seek(0) if delaywrite != None and not delaywrite: # tell it to expect very few objects, forcing a direct mmap expected = 1 return ShaBloom(name, f=f, readwrite=True, expected=expected)
def create(name, expected, delaywrite=None, f=None, k=None): """Create and return a bloom filter for `expected` entries.""" bits = int(math.floor(math.log(expected*MAX_BITS_EACH/8,2))) k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4) if bits > MAX_BLOOM_BITS[k]: log('bloom: warning, max bits exceeded, non-optimal\n') bits = MAX_BLOOM_BITS[k] debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k)) f = f or open(name, 'w+b') f.write('BLOM') f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0)) assert(f.tell() == 16) # NOTE: On some systems this will not extend+zerofill, but it does on # darwin, linux, bsd and solaris. f.truncate(16+2**bits) f.seek(0) if delaywrite != None and not delaywrite: # tell it to expect very few objects, forcing a direct mmap expected = 1 return ShaBloom(name, f=f, readwrite=True, expected=expected)
def sync_indexes(self): conn = self.conn mkdirp(self.cachedir) # All cached idxs are extra until proven otherwise extra = set() for f in os.listdir(self.cachedir): debug1(path_msg(f) + '\n') if f.endswith(b'.idx'): extra.add(f) needed = set() for idx, load in self._list_indexes(): if load: # If the server requests that we load an idx and we don't # already have a copy of it, it is needed needed.add(idx) # Any idx that the server has heard of is proven not extra extra.discard(idx) debug1('client: removing extra indexes: %s\n' % extra) for idx in extra: os.unlink(os.path.join(self.cachedir, idx)) debug1('client: server requested load of: %s\n' % needed) for idx in needed: self.sync_index(idx) git.auto_midx(self.cachedir)
def resolve_targets(specs, src_repo, src_vfs, src_dir, src_cp, dest_repo, fatal): resolved_items = [] common_args = (src_repo, src_vfs, src_dir, src_cp, dest_repo, fatal) for spec in specs: debug1('initial-spec: %s\n' % str(spec)) if spec.method == 'ff': resolved_items.append(resolve_ff(spec, *common_args)) elif spec.method == 'append': resolved_items.append(resolve_append(spec, *common_args)) elif spec.method in ('pick', 'force-pick'): resolved_items.append(resolve_pick(spec, *common_args)) elif spec.method == 'new-tag': resolved_items.append(resolve_new_tag(spec, *common_args)) elif spec.method == 'overwrite': resolved_items.append(resolve_overwrite(spec, *common_args)) elif spec.method == 'unnamed': resolved_items.append(resolve_unnamed(spec, *common_args)) else: # Should be impossible -- prevented by the option parser. assert(False) # FIXME: check for prefix overlap? i.e.: # bup get --ff foo --ff baz:foo/bar # bup get --new-tag .tag/foo --new-tag bar:.tag/foo/bar # Now that we have all the items, check for duplicate tags. tags_targeted = set() for item in resolved_items: dest_path = item.dest and item.dest.path if dest_path: assert(dest_path.startswith('/')) if dest_path.startswith('/.tag/'): if dest_path in tags_targeted: if item.spec.method not in ('overwrite', 'force-pick'): spec_args = '%s %s' % (item.spec.argopt, item.spec.argval) fatal('cannot overwrite tag %r via %r' \ % (dest_path, spec_args)) else: tags_targeted.add(dest_path) return resolved_items
def sync_indexes(self): self._require_command(b'list-indexes') self.check_busy() conn = self.conn mkdirp(self.cachedir) # All cached idxs are extra until proven otherwise extra = set() for f in os.listdir(self.cachedir): debug1(path_msg(f) + '\n') if f.endswith(b'.idx'): extra.add(f) needed = set() conn.write(b'list-indexes\n') for line in linereader(conn): if not line: break assert(line.find(b'/') < 0) parts = line.split(b' ') idx = parts[0] if len(parts) == 2 and parts[1] == b'load' and idx not in extra: # If the server requests that we load an idx and we don't # already have a copy of it, it is needed needed.add(idx) # Any idx that the server has heard of is proven not extra extra.discard(idx) self.check_ok() debug1('client: removing extra indexes: %s\n' % extra) for idx in extra: os.unlink(os.path.join(self.cachedir, idx)) debug1('client: server requested load of: %s\n' % needed) for idx in needed: self.sync_index(idx) git.auto_midx(self.cachedir)
def resolve_targets(specs, src_repo, dest_repo): resolved_items = [] common_args = src_repo, dest_repo for spec in specs: debug1('initial-spec: %s\n' % str(spec)) if spec.method == 'ff': resolved_items.append(resolve_ff(spec, *common_args)) elif spec.method == 'append': resolved_items.append(resolve_append(spec, *common_args)) elif spec.method in ('pick', 'force-pick'): resolved_items.append(resolve_pick(spec, *common_args)) elif spec.method == 'new-tag': resolved_items.append(resolve_new_tag(spec, *common_args)) elif spec.method == 'replace': resolved_items.append(resolve_replace(spec, *common_args)) elif spec.method == 'unnamed': resolved_items.append(resolve_unnamed(spec, *common_args)) else: # Should be impossible -- prevented by the option parser. assert (False) # FIXME: check for prefix overlap? i.e.: # bup get --ff foo --ff: baz foo/bar # bup get --new-tag .tag/foo --new-tag: bar .tag/foo/bar # Now that we have all the items, check for duplicate tags. tags_targeted = set() for item in resolved_items: dest_path = item.dest and item.dest.path if dest_path: assert (dest_path.startswith('/')) if dest_path.startswith('/.tag/'): if dest_path in tags_targeted: if item.spec.method not in ('replace', 'force-pick'): spec_args = '%s %s' % (item.spec.argopt, item.spec.argval) misuse('cannot overwrite tag %r via %r' \ % (dest_path, spec_args)) else: tags_targeted.add(dest_path) return resolved_items
def sync_indexes(self): self.check_busy() conn = self.conn mkdirp(self.cachedir) # All cached idxs are extra until proven otherwise extra = set() for f in os.listdir(self.cachedir): debug1('%s\n' % f) if f.endswith('.idx'): extra.add(f) needed = set() conn.write('list-indexes\n') for line in linereader(conn): if not line: break assert(line.find('/') < 0) parts = line.split(' ') idx = parts[0] if len(parts) == 2 and parts[1] == 'load' and idx not in extra: # If the server requests that we load an idx and we don't # already have a copy of it, it is needed needed.add(idx) # Any idx that the server has heard of is proven not extra extra.discard(idx) self.check_ok() debug1('client: removing extra indexes: %s\n' % extra) for idx in extra: os.unlink(os.path.join(self.cachedir, idx)) debug1('client: server requested load of: %s\n' % needed) for idx in needed: self.sync_index(idx) git.auto_midx(self.cachedir)
def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None, exclude_rxs=None): for (name,pst) in _dirlist(): path = prepend + name if excluded_paths: if os.path.normpath(path) in excluded_paths: debug1('Skipping %r: excluded.\n' % path) continue if exclude_rxs and should_rx_exclude_path(path, exclude_rxs): continue if name.endswith('/'): if bup_dir != None: if os.path.normpath(path) == bup_dir: debug1('Skipping BUP_DIR.\n') continue if xdev != None and pst.st_dev != xdev: debug1('Skipping contents of %r: different filesystem.\n' % path) else: try: OsFile(name).fchdir() except OSError as e: add_error('%s: %s' % (prepend, e)) else: for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): yield i os.chdir('..') yield (path, pst)
def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None, exclude_rxs=None): for (name, pst) in _dirlist(): path = prepend + name if excluded_paths: if os.path.normpath(path) in excluded_paths: debug1('Skipping %r: excluded.\n' % path) continue if exclude_rxs and should_rx_exclude_path(path, exclude_rxs): continue if name.endswith('/'): if bup_dir != None: if os.path.normpath(path) == bup_dir: debug1('Skipping BUP_DIR.\n') continue if xdev != None and pst.st_dev != xdev: debug1('Skipping contents of %r: different filesystem.\n' % path) else: try: OsFile(name).fchdir() except OSError as e: add_error('%s: %s' % (prepend, e)) else: for i in _recursive_dirlist(prepend=prepend + name, xdev=xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): yield i os.chdir('..') yield (path, pst)
def receive_objects_v2(conn, junk): global suspended_w _init_session() suggested = set() if suspended_w: w = suspended_w suspended_w = None else: if dumb_server_mode: w = git.PackWriter(objcache_maker=None) else: w = git.PackWriter() while 1: ns = conn.read(4) if not ns: w.abort() raise Exception('object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #debug2('expecting %d bytes\n' % n) if not n: debug1('bup server: received %d object%s.\n' % (w.count, w.count!=1 and "s" or '')) fullpath = w.close(run_midx=not dumb_server_mode) if fullpath: (dir, name) = os.path.split(fullpath) conn.write('%s.idx\n' % name) conn.ok() return elif n == 0xffffffff: debug2('bup server: receive-objects suspended.\n') suspended_w = w conn.ok() return shar = conn.read(20) crcr = struct.unpack('!I', conn.read(4))[0] n -= 20 + 4 buf = conn.read(n) # object sizes in bup are reasonably small #debug2('read %d bytes\n' % n) _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n') if not dumb_server_mode: oldpack = w.exists(shar, want_source=True) if oldpack: assert(not oldpack == True) assert(oldpack.endswith('.idx')) (dir,name) = os.path.split(oldpack) if not (name in suggested): debug1("bup server: suggesting index %s\n" % git.shorten_hash(name)) debug1("bup server: because of object %s\n" % shar.encode('hex')) conn.write('index %s\n' % name) suggested.add(name) continue nw, crc = w._raw_write((buf,), sha=shar) _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
def _recursive_dirlist(prepend, xdev, bup_dir=None, excluded_paths=None, exclude_rxs=None, xdev_exceptions=frozenset()): for (name,pst) in _dirlist(): path = prepend + name if excluded_paths: if os.path.normpath(path) in excluded_paths: debug1('Skipping %r: excluded.\n' % path_msg(path)) continue if exclude_rxs and should_rx_exclude_path(path, exclude_rxs): continue if name.endswith(b'/'): if bup_dir != None: if os.path.normpath(path) == bup_dir: debug1('Skipping BUP_DIR.\n') continue if xdev != None and pst.st_dev != xdev \ and path not in xdev_exceptions: debug1('Skipping contents of %r: different filesystem.\n' % path_msg(path)) else: try: with finalized_fd(name) as fd: os.fchdir(fd) except OSError as e: add_error('%s: %s' % (prepend, e)) else: for i in _recursive_dirlist(prepend=prepend+name, xdev=xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs, xdev_exceptions=xdev_exceptions): yield i os.chdir(b'..') yield (path, pst)
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = pi.next() assert(i == pin) assert(p.exists(i)) return total, outfilename
git.check_repo_or_die() if opt.max_files < 0: opt.max_files = max_files() assert(opt.max_files >= 5) if opt.check: # check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, '*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: do_midx(git.repo('objects/pack'), opt.output, extra, '') elif opt.auto or opt.force: paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) do_midx_dir(path, opt.output) else: o.fatal("you must use -f or -a or provide input filenames")
dest_repo, o.fatal) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = {'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'overwrite': handle_overwrite, 'unnamed': handle_unnamed} for item in target_items: debug1('get-spec: %s\n' % str(item.spec)) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith('/.tag/'): dest_ref = 'refs/tags/%s' % dest_path[6:] else: dest_ref = 'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash
sys.exit(1) tag_file = 'refs/tags/%s' % opt.delete git.delete_ref(tag_file) sys.exit(0) if not extra: for t in tags: print t sys.exit(0) elif len(extra) < 2: o.fatal('no commit ref or hash given.') (tag_name, commit) = extra[:2] if not tag_name: o.fatal("tag name must not be empty.") debug1("args: tag name = %s; commit = %s\n" % (tag_name, commit)) if tag_name in tags and not opt.force: log("bup: error: tag '%s' already exists\n" % tag_name) sys.exit(1) if tag_name.startswith('.'): o.fatal("'%s' is not a valid tag name." % tag_name) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash:
def receive_objects_v2(self, junk): self.init_session() suggested = set() if self.suspended: self.suspended = False else: if self.dumb_server_mode: objcache_maker = lambda: None else: objcache_maker = None # FIXME: this goes together with the direct accesses below self.repo._ensure_packwriter() while 1: ns = self.conn.read(4) if not ns: self.repo.abort_writing() raise Exception( 'object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #debug2('expecting %d bytes\n' % n) if not n: # FIXME: don't be lazy and count ourselves, or something, at least # don't access self.repo internals debug1('bup server: received %d object%s.\n' % (self.repo._packwriter.count, self.repo._packwriter.count != 1 and "s" or '')) fullpath = self.repo.finish_writing( run_midx=not self.dumb_server_mode) if fullpath: (dir, name) = os.path.split(fullpath) self.conn.write(b'%s.idx\n' % name) self.conn.ok() return elif n == 0xffffffff: debug2('bup server: receive-objects suspended.\n') self.suspended = True self.conn.ok() return shar = self.conn.read(20) crcr = struct.unpack('!I', self.conn.read(4))[0] n -= 20 + 4 buf = self.conn.read(n) # object sizes in bup are reasonably small #debug2('read %d bytes\n' % n) self._check(n, len(buf), 'object read: expected %d bytes, got %d\n') if not self.dumb_server_mode: result = self.repo.exists(shar, want_source=True) if result: oldpack = result.pack assert (oldpack.endswith(b'.idx')) (dir, name) = os.path.split(oldpack) if not (name in suggested): debug1("bup server: suggesting index %s\n" % git.shorten_hash(name).decode('ascii')) debug1("bup server: because of object %s\n" % hexstr(shar)) self.conn.write(b'index %s\n' % name) suggested.add(name) continue # FIXME: figure out the right abstraction for this; or better yet, # make the protocol aware of the object type nw, crc = self.repo._packwriter._raw_write((buf, ), sha=shar) self._check(crcr, crc, 'object read: expected crc %d, got %d\n')
def _set_mode(): global dumb_server_mode dumb_server_mode = os.path.exists(git.repo('bup-dumb-server')) debug1('bup server: serving in %s mode\n' % (dumb_server_mode and 'dumb' or 'smart'))
sys.exit(0) if not extra: for t in tags: sys.stdout.flush() out = byte_stream(sys.stdout) out.write(t) out.write(b'\n') sys.exit(0) elif len(extra) != 2: o.fatal('expected commit ref and hash') tag_name, commit = map(argv_bytes, extra[:2]) if not tag_name: o.fatal("tag name must not be empty.") debug1("args: tag name = %s; commit = %s\n" % (path_msg(tag_name), commit.decode('ascii'))) if tag_name in tags and not opt.force: log("bup: error: tag '%s' already exists\n" % path_msg(tag_name)) sys.exit(1) if tag_name.startswith(b'.'): o.fatal("'%s' is not a valid tag name." % path_msg(tag_name)) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash:
subcmd = extra debug2('bup mux: starting %r\n' % (extra,)) outr, outw = os.pipe() errr, errw = os.pipe() def close_fds(): os.close(outr) os.close(errr) p = subprocess.Popen(subcmd, stdin=orig_stdin, stdout=outw, stderr=errw, close_fds=False, preexec_fn=close_fds) os.close(outw) os.close(errw) sys.stdout.flush() out = byte_stream(sys.stdout) out.write(b'BUPMUX') out.flush() mux(p, out.fileno(), outr, errr) os.close(outr) os.close(errr) prv = p.wait() if prv: debug1('%s exited with code %d\n' % (extra[0], prv)) debug1('bup mux: done\n') sys.exit(prv)
"list-indexes": list_indexes, "send-index": send_index, "receive-objects-v2": receive_objects_v2, "read-ref": read_ref, "update-ref": update_ref, "cat": cat, } # FIXME: this protocol is totally lame and not at all future-proof. # (Especially since we abort completely as soon as *anything* bad happens) conn = Conn(sys.stdin, sys.stdout) lr = linereader(conn) for _line in lr: line = _line.strip() if not line: continue debug1("bup server: command: %r\n" % line) words = line.split(" ", 1) cmd = words[0] rest = len(words) > 1 and words[1] or "" if cmd == "quit": break else: cmd = commands.get(cmd) if cmd: cmd(conn, rest) else: raise Exception("unknown server command: %r\n" % line) debug1("bup server: done\n")
(opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no positional parameters expected') if not opt.check and opt.k and opt.k not in (4, 5): o.fatal('only k values of 4 and 5 are supported') if opt.check: opt.check = argv_bytes(opt.check) git.check_repo_or_die() output = argv_bytes(opt.output) if opt.output else None paths = opt.dir and [argv_bytes(opt.dir)] or git.all_packdirs() for path in paths: debug1('bloom: scanning %s\n' % path_msg(path)) outfilename = output or os.path.join(path, b'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) elif opt.ruin: ruin_bloom(outfilename) else: do_bloom(path, outfilename, opt.k) if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) sys.exit(1) elif opt.check: log('All tests passed.\n')
def do_bloom(path, outfilename, k, force): global _first assert k in (None, 4, 5) b = None if os.path.exists(outfilename) and not force: b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None add = [] rest = [] add_count = 0 rest_count = 0 for i, name in enumerate(glob.glob(b'%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): rest.append(name) rest_count += len(ix) else: add.append(name) add_count += len(ix) if not add: debug1("bloom: nothing to do.\n") return if b: if len(b) != rest_count: debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None elif k is not None and k != b.k: debug1("bloom: new k %d != existing k %d, regenerating\n" % (k, b.k)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) if not b: # Need all idxs to build from scratch add += rest add_count += rest_count del rest del rest_count msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b'' progress('bloom: %s%s %d file%s (%d object%s).\r' % (path_msg(dirprefix), msg, len(add), len(add) != 1 and 's' or '', add_count, add_count != 1 and 's' or '')) tfname = None if b is None: tfname = os.path.join(path, b'bup.tmp.bloom') b = bloom.create(tfname, expected=add_count, k=k) count = 0 icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount * 100.0 / add_count, icount, add_count)) b.add_idx(ix) count += 1 icount += len(ix) # Currently, there's an open file object for tfname inside b. # Make sure it's closed before rename. b.close() if tfname: os.rename(tfname, outfilename)
def _set_mode(): global dumb_server_mode dumb_server_mode = os.path.exists(git.repo("bup-dumb-server")) debug1("bup server: serving in %s mode\n" % (dumb_server_mode and "dumb" or "smart"))
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) git.check_repo_or_die() tags = [t for sublist in git.tags().values() for t in sublist] if opt.delete: # git.delete_ref() doesn't complain if a ref doesn't exist. We # could implement this verification but we'd need to read in the # contents of the tag file and pass the hash, and we already know # about the tag's existance via "tags". tag_name = argv_bytes(opt.delete) if not opt.force and tag_name not in tags: log("error: tag '%s' doesn't exist\n" % path_msg(tag_name)) sys.exit(1) tag_file = b'refs/tags/%s' % tag_name git.delete_ref(tag_file) sys.exit(0) if not extra: for t in tags: sys.stdout.flush() out = byte_stream(sys.stdout) out.write(t) out.write(b'\n') sys.exit(0) elif len(extra) != 2: o.fatal('expected commit ref and hash') tag_name, commit = map(argv_bytes, extra[:2]) if not tag_name: o.fatal("tag name must not be empty.") debug1("args: tag name = %s; commit = %s\n" % (path_msg(tag_name), commit.decode('ascii'))) if tag_name in tags and not opt.force: log("bup: error: tag '%s' already exists\n" % path_msg(tag_name)) sys.exit(1) if tag_name.startswith(b'.'): o.fatal("'%s' is not a valid tag name." % path_msg(tag_name)) try: hash = git.rev_parse(commit) except git.GitError as e: log("bup: error: %s" % e) sys.exit(2) if not hash: log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) pL = git.PackIdxList(git.repo(b'objects/pack')) if not pL.exists(hash): log("bup: error: commit %s not found.\n" % commit.decode('ascii')) sys.exit(2) tag_file = git.repo(b'refs/tags/' + tag_name) try: tag = open(tag_file, 'wb') except OSError as e: log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e)) sys.exit(3) with tag as tag: tag.write(hexlify(hash)) tag.write(b'\n')
handle_ctrl_c() o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal("no positional parameters expected") git.check_repo_or_die() if not opt.check and opt.k and opt.k not in (4, 5): o.fatal("only k values of 4 and 5 are supported") paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1("bloom: scanning %s\n" % path) outfilename = opt.output or os.path.join(path, "bup.bloom") if opt.check: check_bloom(path, outfilename, opt.check) elif opt.ruin: ruin_bloom(outfilename) else: do_bloom(path, outfilename) if saved_errors: log("WARNING: %d errors encountered during bloom.\n" % len(saved_errors)) sys.exit(1) elif opt.check: log("All tests passed.\n")
if len(extra) < 1: o.fatal('command is required') subcmd = extra debug2('bup mux: starting %r\n' % (extra,)) outr, outw = os.pipe() errr, errw = os.pipe() def close_fds(): os.close(outr) os.close(errr) p = subprocess.Popen(subcmd, stdin=orig_stdin, stdout=outw, stderr=errw, preexec_fn=close_fds) os.close(outw) os.close(errw) sys.stdout.write('BUPMUX') sys.stdout.flush() mux(p, sys.stdout.fileno(), outr, errr) os.close(outr) os.close(errr) prv = p.wait() if prv: debug1('%s exited with code %d\n' % (extra[0], prv)) debug1('bup mux: done\n') sys.exit(prv)
def _do_midx(outdir, outfilename, infilenames, prefixstr, auto=False, force=False): global _first if not outfilename: assert(outdir) sum = hexlify(Sha1(b'\0'.join(infilenames)).digest()) outfilename = b'%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20]) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b'' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (auto and (total < 1024 and len(infilenames) < 3)) \ or ((auto or force) and len(infilenames) < 2) \ or (force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write(b'MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write(b'\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) log(repr(p.idxnames) + '\n') assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = next(pi) assert(i == pin) assert(p.exists(i)) return total, outfilename
def main(): handle_ctrl_c() is_reverse = environ.get(b'BUP_SERVER_REVERSE') opt = parse_args(compat.argv) git.check_repo_or_die() if opt.source: opt.source = argv_bytes(opt.source) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if is_reverse and opt.remote: misuse("don't use -r in reverse mode; it's automatic") if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.remote or is_reverse: dest_repo = RemoteRepo(opt.remote) else: dest_repo = LocalRepo() with dest_repo as dest_repo: with LocalRepo(repo_dir=opt.source) as src_repo: with dest_repo.new_packwriter(compression_level=opt.compress) as writer: # Resolve and validate all sources and destinations, # implicit or explicit, and do it up-front, so we can # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, src_repo, dest_repo) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = {'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'replace': handle_replace, 'unnamed': handle_unnamed} for item in target_items: debug1('get-spec: %r\n' % (item.spec,)) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith(b'/.tag/'): dest_ref = b'refs/tags/%s' % dest_path[6:] else: dest_ref = b'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash cur_ref = cur_ref or dest_hash handler = handlers[item.spec.method] item_result = handler(item, src_repo, writer, opt) if len(item_result) > 1: new_id, tree = item_result else: new_id = item_result[0] if not dest_ref: log_item(item.spec.src, item.src.type, opt) else: updated_refs[dest_ref] = (orig_ref, new_id) if dest_ref.startswith(b'refs/tags/'): log_item(item.spec.src, item.src.type, opt, tag=new_id) else: log_item(item.spec.src, item.src.type, opt, tree=tree, commit=new_id) # Only update the refs at the very end, once the writer is # closed, so that if something goes wrong above, the old refs # will be undisturbed. for ref_name, info in items(updated_refs): orig_ref, new_ref = info try: dest_repo.update_ref(ref_name, new_ref, orig_ref) if opt.verbose: new_hex = hexlify(new_ref) if orig_ref: orig_hex = hexlify(orig_ref) log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError) as ex: add_error('unable to update ref %r: %s' % (ref_name, ex)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def init_dir(conn, arg): git.init_repo(arg) debug1('bup server: bupdir initialized: %r\n' % git.repodir) _init_session(arg) conn.ok()
'update-ref': update_ref, 'join': join, 'cat': join, # apocryphal alias 'cat-batch' : cat_batch, 'refs': refs, 'rev-list': rev_list } # FIXME: this protocol is totally lame and not at all future-proof. # (Especially since we abort completely as soon as *anything* bad happens) conn = Conn(sys.stdin, sys.stdout) lr = linereader(conn) for _line in lr: line = _line.strip() if not line: continue debug1('bup server: command: %r\n' % line) words = line.split(' ', 1) cmd = words[0] rest = len(words)>1 and words[1] or '' if cmd == 'quit': break else: cmd = commands.get(cmd) if cmd: cmd(conn, rest) else: raise Exception('unknown server command: %r\n' % line) debug1('bup server: done\n')
def refresh(self, skip_midx = False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The module-global variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in glob.glob(os.path.join(self.dir,'*.midx')): if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' + ' used by %s\n') % (n, mxf)) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort(key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % os.path.basename(ix.name)) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir,'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, 'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(lambda x,y: -cmp(len(x),len(y))) if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
git.check_repo_or_die() if opt.max_files < 0: opt.max_files = max_files() assert (opt.max_files >= 5) if opt.check: # check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, '*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: do_midx(git.repo('objects/pack'), opt.output, extra, '') elif opt.auto or opt.force: paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) do_midx_dir(path, opt.output) else: o.fatal("you must use -f or -a or provide input filenames")
handle_ctrl_c() o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no positional parameters expected') git.check_repo_or_die() if not opt.check and opt.k and opt.k not in (4,5): o.fatal('only k values of 4 and 5 are supported') paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('bloom: scanning %s\n' % path) outfilename = opt.output or os.path.join(path, 'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) elif opt.ruin: ruin_bloom(outfilename) else: do_bloom(path, outfilename) if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) sys.exit(1) elif opt.check: log('All tests passed.\n')
handle_ctrl_c() o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) if extra: o.fatal('no positional parameters expected') git.check_repo_or_die() if not opt.check and opt.k and opt.k not in (4, 5): o.fatal('only k values of 4 and 5 are supported') paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('bloom: scanning %s\n' % path) outfilename = opt.output or os.path.join(path, 'bup.bloom') if opt.check: check_bloom(path, outfilename, opt.check) elif opt.ruin: ruin_bloom(outfilename) else: do_bloom(path, outfilename) if saved_errors: log('WARNING: %d errors encountered during bloom.\n' % len(saved_errors)) sys.exit(1) elif opt.check: log('All tests passed.\n')
def do_bloom(path, outfilename): global _first b = None if os.path.exists(outfilename) and not opt.force: b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None add = [] rest = [] add_count = 0 rest_count = 0 for i,name in enumerate(glob.glob('%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): rest.append(name) rest_count += len(ix) else: add.append(name) add_count += len(ix) total = add_count + rest_count if not add: debug1("bloom: nothing to do.\n") return if b: if len(b) != rest_count: debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) if not b: # Need all idxs to build from scratch add += rest add_count += rest_count del rest del rest_count msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' progress('bloom: %s%s %d file%s (%d object%s).\n' % (dirprefix, msg, len(add), len(add)!=1 and 's' or '', add_count, add_count!=1 and 's' or '')) tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') b = bloom.create(tfname, expected=add_count, k=opt.k) count = 0 icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount*100.0/add_count, icount, add_count)) b.add_idx(ix) count += 1 icount += len(ix) # Currently, there's an open file object for tfname inside b. # Make sure it's closed before rename. b.close() if tfname: os.rename(tfname, outfilename)
def refresh(self, skip_midx=False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The instance variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ if self.bloom is not None: self.bloom.close() self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or self.ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] midxes = set(glob.glob(os.path.join(self.dir, b'*.midx'))) # remove any *.midx files from our list that no longer exist for ix in list(d.values()): if not isinstance(ix, midx.PackMidx): continue if ix.name in midxes: continue # remove the midx del d[ix.name] ix.close() self.packs.remove(ix) for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in midxes: if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' ' used by %s\n') % (path_msg(n), path_msg(mxf))) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort( key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % path_msg(os.path.basename(ix.name))) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir, b'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, b'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(reverse=True, key=lambda x: len(x)) if self.bloom and self.bloom.valid() and len( self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs) != 1 and 'es' or ''))
if opt.max_files < 0: opt.max_files = max_files() assert (opt.max_files >= 5) extra = [argv_bytes(x) for x in extra] if opt.check: # check existing midx files if extra: midxes = extra else: midxes = [] paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path) midxes += glob.glob(os.path.join(path, b'*.midx')) for name in midxes: check_midx(name) if not saved_errors: log('All tests passed.\n') else: if extra: sys.stdout.flush() do_midx(git.repo(b'objects/pack'), opt.output, extra, b'', byte_stream(sys.stdout)) elif opt.auto or opt.force: sys.stdout.flush() paths = opt.dir and [opt.dir] or git.all_packdirs() for path in paths: debug1('midx: scanning %s\n' % path_msg(path))