def packed(self): try: ctime = xstat.nsecs_to_timespec(self.ctime) mtime = xstat.nsecs_to_timespec(self.mtime) atime = xstat.nsecs_to_timespec(self.atime) return struct.pack( INDEX_SIG, self.dev, self.ino, self.nlink, ctime[0], ctime[1], mtime[0], mtime[1], atime[0], atime[1], self.size, self.mode, self.gitmode, self.sha, self.flags, self.children_ofs, self.children_n, self.meta_ofs, ) except (DeprecationWarning, struct.error) as e: log("pack error: %s (%r)\n" % (e, self)) raise
def getattr(self, path): log('--getattr(%r)\n' % path) try: node = cache_get(self.top, path) st = Stat() st.st_nlink = node.nlinks() st.st_size = node.size() # Until/unless we store the size in m. if self.meta: m = node.metadata() if m: st.st_mode = m.mode st.st_uid = m.uid st.st_gid = m.gid st.st_atime = max(0, xstat.fstime_floor_secs(m.atime)) st.st_mtime = max(0, xstat.fstime_floor_secs(m.mtime)) st.st_ctime = max(0, xstat.fstime_floor_secs(m.ctime)) st.st_rdev = m.rdev else: if self.mode: mask = stat.S_ISDIR(node.mode) and 0111 setmode = stat.S_ISLNK(node.mode) and 0777 st.st_mode = (self.mode & (0666 | mask)) | \ (node.mode & ~0777) | \ setmode else: st.st_mode = node.mode st.st_atime = node.atime st.st_mtime = node.mtime st.st_ctime = node.ctime st.st_uid = self.uid or 0 st.st_gid = self.gid or 0 return st except vfs.NoSuchFile: return -errno.ENOENT
def check_index(reader): try: log('check: checking forward iteration...\n') e = None d = {} for e in reader.forward_iter(): if e.children_n: if opt.verbose: log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, e.name)) assert(e.children_ofs) assert(e.name.endswith('/')) assert(not d.get(e.children_ofs)) d[e.children_ofs] = 1 if e.flags & index.IX_HASHVALID: assert(e.sha != index.EMPTY_SHA) assert(e.gitmode) assert(not e or e.name == '/') # last entry is *always* / log('check: checking normal iteration...\n') last = None for e in reader: if last: assert(last > e.name) last = e.name except: log('index error! at %r\n' % e) raise log('check: passed.\n')
def test_pack_name_lookup(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0,28,2): w = git.PackWriter() for i in range(start, start+2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e,idxname in enumerate(idxnames): for i in range(e*2, (e+1)*2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def open(self, path, flags): log('--open(%r)\n' % path) node = cache_get(self.top, path) accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: return -errno.EACCES node.open()
def readdir(self, path, offset): log('--readdir(%r)\n' % path) node = cache_get(self.top, path) yield fuse.Direntry('.') yield fuse.Direntry('..') for sub in node.subs(): yield fuse.Direntry(sub.name)
def find_live_objects(existing_count, cat_pipe, opt): prune_visited_trees = True # In case we want a command line option later pack_dir = git.repo('objects/pack') ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir) os.close(ffd) # FIXME: allow selection of k? # FIXME: support ephemeral bloom filters (i.e. *never* written to disk) live_objs = bloom.create(bloom_filename, expected=existing_count, k=None) stop_at, trees_visited = None, None if prune_visited_trees: trees_visited = set() stop_at = lambda (x): x.decode('hex') in trees_visited approx_live_count = 0 for ref_name, ref_id in git.list_refs(): for item in walk_object(cat_pipe, ref_id.encode('hex'), stop_at=stop_at, include_data=None): # FIXME: batch ids if opt.verbose: report_live_item(approx_live_count, existing_count, ref_name, ref_id, item) bin_id = item.id.decode('hex') if trees_visited is not None and item.type == 'tree': trees_visited.add(bin_id) if opt.verbose: if not live_objs.exists(bin_id): live_objs.add(bin_id) approx_live_count += 1 else: live_objs.add(bin_id) trees_visited = None if opt.verbose: log('expecting to retain about %.2f%% unnecessary objects\n' % live_objs.pfalse_positive()) return live_objs
def save_tree(output_file, paths, recurse=False, write_paths=True, save_symlinks=True, xdev=False): # Issue top-level rewrite warnings. for path in paths: safe_path = _clean_up_path_for_archive(path) if safe_path != path: log('archiving "%s" as "%s"\n' % (path, safe_path)) if not recurse: for p in paths: safe_path = _clean_up_path_for_archive(p) st = xstat.lstat(p) if stat.S_ISDIR(st.st_mode): safe_path += "/" m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks) if verbose: print >> sys.stderr, m.path m.write(output_file, include_path=write_paths) else: start_dir = os.getcwd() try: for (p, st) in recursive_dirlist(paths, xdev=xdev): dirlist_dir = os.getcwd() os.chdir(start_dir) safe_path = _clean_up_path_for_archive(p) m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks) if verbose: print >> sys.stderr, m.path m.write(output_file, include_path=write_paths) os.chdir(dirlist_dir) finally: os.chdir(start_dir)
def read(self, path, size, offset): if self.verbose > 0: log('--read(%r)\n' % path) n = cache_get(self.top, path) o = n.open() o.seek(offset) return o.read(size)
def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % nicename) try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (name, e)) return for count,subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) prev = None for ecount,e in enumerate(ix): if not (ecount % 1234): qprogress(' Ordering: %d/%d\r' % (ecount, len(ix))) if not e >= prev: add_error('%s: ordering error: %s < %s' % (nicename, str(e).encode('hex'), str(prev).encode('hex'))) prev = e
def handle_sigterm(signum, frame): global io_loop debug1('\nbup-web: signal %d received\n' % signum) log('Shutdown requested\n') if not io_loop: sys.exit(0) io_loop.stop()
def __init__(self, filename): self.filename = filename self.m = '' self.writable = False self.count = 0 f = None try: f = open(filename, 'r+') except IOError as e: if e.errno == errno.ENOENT: pass else: raise if f: b = f.read(len(INDEX_HDR)) if b != INDEX_HDR: log('warning: %s: header: expected %r, got %r\n' % (filename, INDEX_HDR, b)) else: st = os.fstat(f.fileno()) if st.st_size: self.m = mmap_readwrite(f) self.writable = True self.count = struct.unpack(FOOTER_SIG, str(buffer(self.m, st.st_size-FOOTLEN, FOOTLEN)))[0]
def __init__(self, filename): # Map metadata hashes to bupindex.meta offsets. self._offsets = {} self._filename = filename self._file = None # FIXME: see how slow this is; does it matter? m_file = open(filename, 'ab+') try: m_file.seek(0) try: m_off = m_file.tell() m = metadata.Metadata.read(m_file) while m: m_encoded = m.encode() self._offsets[m_encoded] = m_off m_off = m_file.tell() m = metadata.Metadata.read(m_file) except EOFError: pass except: log('index metadata in %r appears to be corrupt' % filename) raise finally: m_file.close() self._file = open(filename, 'ab')
def do_ls(repo, args): try: opt = ls.opts_from_cmdline(args, onabort=OptionError) except OptionError as e: log('error: %s' % e) return return ls.within_repo(repo, opt)
def check_linux_file_attr_api(): global get_linux_file_attr, set_linux_file_attr if not (get_linux_file_attr or set_linux_file_attr): return if _suppress_linux_file_attr: log('Warning: Linux attr support disabled (see "bup help index").\n') get_linux_file_attr = set_linux_file_attr = None
def readdir(self, path, offset): log("--readdir(%r)\n" % path) node = cache_get(self.top, path) yield fuse.Direntry(".") yield fuse.Direntry("..") for sub in node.subs(): yield fuse.Direntry(sub.name)
def readlink(self, path): if self.verbose > 0: log('--readlink(%r)\n' % path) res = vfs.resolve(self.repo, path, follow=False) name, item = res[-1] if not item: return -errno.ENOENT return vfs.readlink(repo, item)
def __init__(self, repo_dir = None): global _ver_warned self.repo_dir = repo_dir wanted = ('1','5','6') if ver() < wanted: log('error: git version must be at least 1.5.6\n') sys.exit(1) self.p = self.inprogress = None
def remove_stale_files(new_pack_prefix): if opt.verbose and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if opt.verbose: log('removing ' + basename(p) + '\n') os.unlink(p) ns.stale_files = []
def ruin_bloom(bloomfilename): rbloomfilename = git.repo_rel(bloomfilename) if not os.path.exists(bloomfilename): log("%s\n" % bloomfilename) add_error("bloom: %s not found to ruin\n" % rbloomfilename) return b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) b.map[16:16+2**b.bits] = '\0' * 2**b.bits
def read(self, path, size, offset): if self.verbose > 0: log('--read(%r)\n' % path) res = vfs.resolve(self.repo, path, follow=False) name, item = res[-1] if not item: return -errno.ENOENT with vfs.fopen(repo, item) as f: f.seek(offset) return f.read(size)
def open(self, path, flags): if self.verbose > 0: log('--open(%r)\n' % path) res = vfs.resolve(self.repo, path, follow=False) name, item = res[-1] if not item: return -errno.ENOENT accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: return -errno.EACCES
def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if verbosity: log('removing ' + basename(p) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = []
def git_verify(base): if opt.quick: try: quick_verify(base) except Exception as e: log('error: %s\n' % e) return 1 return 0 else: return run(['git', 'verify-pack', '--', base])
def clear_index(indexfile): indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink'] for indexfile in indexfiles: path = git.repo(indexfile) try: os.remove(path) if opt.verbose: log('clear: removed %s\n' % path) except OSError as e: if e.errno != errno.ENOENT: raise
def join(self, id): """Generate a list of the content of all blobs that can be reached from an object. The hash given in 'id' must point to a blob, a tree or a commit. The content of all blobs that can be seen from trees or commits will be added to the list. """ try: for d in self._join(self.get(id)): yield d except StopIteration: log('booger!\n')
def _write(self, sha, type, content): if verbose: log('>') if not sha: sha = calc_hash(type, content) size, crc = self._raw_write(_encode_packobj(type, content, self.compression_level), sha=sha) if self.outbytes >= max_pack_size or self.count >= max_pack_objects: self.breakpoint() return sha
def par2_setup(): global par2_ok rv = 1 try: p = subprocess.Popen(['par2', '--help'], stdout=nullf, stderr=nullf, stdin=nullf) rv = p.wait() except OSError: log('fsck: warning: par2 not found; disabling recovery features.\n') else: par2_ok = 1
def bup_rm(paths, compression=6, verbosity=None): root = vfs.RefList(None) dead_branches, dead_saves = dead_items(root, paths) die_if_errors('not proceeding with any removals\n') updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) for branch, node in dead_branches.iteritems(): ref = 'refs/heads/' + branch assert(not ref in updated_refs) updated_refs[ref] = (node.hash, None) if dead_saves: writer = git.PackWriter(compression_level=compression) try: for branch, saves in dead_saves.iteritems(): assert(saves) updated_refs['refs/heads/' + branch] = rm_saves(saves, writer) except: if writer: writer.abort() raise else: if writer: # Must close before we can update the ref(s) below. writer.close() # Only update the refs here, at the very end, so that if something # goes wrong above, the old refs will be undisturbed. Make an attempt # to update each ref. for ref_name, info in updated_refs.iteritems(): orig_ref, new_ref = info try: if not new_ref: git.delete_ref(ref_name, orig_ref.encode('hex')) else: git.update_ref(ref_name, new_ref, orig_ref) if verbosity: new_hex = new_ref.encode('hex') if orig_ref: orig_hex = orig_ref.encode('hex') log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, ClientError) as ex: if new_ref: add_error('while trying to update %r (%s -> %s): %s' % (ref_name, orig_ref, new_ref, ex)) else: add_error('while trying to delete %r (%s): %s' % (ref_name, orig_ref, ex))
def count_objects(dir, verbosity): # For now we'll just use open_idx(), but we could probably be much # more efficient since all we need is a single integer (the last # fanout entry) from each index. object_count = 0 indexes = glob.glob(os.path.join(dir, '*.idx')) for i, idx_name in enumerate(indexes): if verbosity: log('found %d objects (%d/%d %s)\r' % (object_count, i + 1, len(indexes), basename(idx_name))) idx = git.open_idx(idx_name) object_count += len(idx) return object_count
def _fast_get(self, id): if not self.p or self.p.poll() != None: self._restart() assert (self.p) poll_result = self.p.poll() assert (poll_result == None) if self.inprogress: log('_fast_get: opening %r while %r is open\n' % (id, self.inprogress)) assert (not self.inprogress) assert (id.find('\n') < 0) assert (id.find('\r') < 0) assert (not id.startswith('-')) self.inprogress = id self.p.stdin.write('%s\n' % id) self.p.stdin.flush() hdr = self.p.stdout.readline() if hdr.endswith(' missing\n'): self.inprogress = None raise KeyError('blob %r is missing' % id) spl = hdr.split(' ') if len(spl) != 3 or len(spl[0]) != 40: raise GitError('expected blob, got %r' % spl) (hex, type, size) = spl it = _AbortableIter(chunkyreader(self.p.stdout, int(spl[2])), onabort=self._abort) try: yield type for blob in it: yield blob readline_result = self.p.stdout.readline() assert (readline_result == '\n') self.inprogress = None except Exception as e: it.abort() raise
def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % path_msg(nicename)) try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (path_msg(name), e)) return with ix: for count, subname in enumerate(ix.idxnames): with git.open_idx(os.path.join(os.path.dirname(name), subname)) \ as sub: for ecount, e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname).decode('ascii'), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) prev = None for ecount, e in enumerate(ix): if not (ecount % 1234): qprogress(' Ordering: %d/%d\r' % (ecount, len(ix))) if e and prev and not e >= prev: add_error('%s: ordering error: %s < %s' % (nicename, hexstr(e), hexstr(prev))) prev = e
def __init__(self, filename, f=None, readwrite=False, expected=-1): self.name = filename self.rwfile = None self.map = None assert (filename.endswith('.bloom')) if readwrite: assert (expected > 0) self.rwfile = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' # write) or else map them privately for later writing back to # the file ('delayed' write). A bloom table's write access # pattern is such that we dirty almost all the pages after adding # very few entries. But the table is so big that dirtying # *all* the pages often exceeds Linux's default # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio, # thus causing it to start flushing the table before we're # finished... even though there's more than enough space to # store the bloom table in RAM. # # To work around that behaviour, if we calculate that we'll # probably end up touching the whole table anyway (at least # one bit flipped per memory page), let's use a "private" mmap, # which defeats Linux's ability to flush it to disk. Then we'll # flush it as one big lump during close(). pages = os.fstat(f.fileno()).st_size / 4096 * 5 # assume k=5 self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: self.map = mmap_readwrite_private(self.rwfile, close=False) else: self.map = mmap_readwrite(self.rwfile, close=False) else: self.rwfile = None f = f or open(filename, 'rb') self.map = mmap_read(f) got = str(self.map[0:4]) if got != 'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) return self._init_failed() ver = struct.unpack('!I', self.map[4:8])[0] if ver < BLOOM_VERSION: log('Warning: ignoring old-style (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() if ver > BLOOM_VERSION: log('Warning: ignoring too-new (v%d) bloom %r\n' % (ver, filename)) return self._init_failed() self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16]) idxnamestr = str(self.map[16 + 2**self.bits:]) if idxnamestr: self.idxnames = idxnamestr.split('\0') else: self.idxnames = []
def _add_linux_attr(self, path, st): check_linux_file_attr_api() if not get_linux_file_attr: return if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode): try: attr = get_linux_file_attr(path) if attr != 0: self.linux_attr = attr except OSError as e: if e.errno == errno.EACCES: add_error('read Linux attr: %s' % e) elif e.errno in (ENOTTY, ENOSYS, EOPNOTSUPP): # Assume filesystem doesn't support attrs. return elif e.errno == EINVAL: global _warned_about_attr_einval if not _warned_about_attr_einval: log("Ignoring attr EINVAL;" + " if you're not using ntfs-3g, please report: " + path_msg(path) + '\n') _warned_about_attr_einval = True return else: raise
def test_pack_name_lookup(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 packdir = git.repo('objects/pack') idxnames = [] hashes = [] for start in range(0, 28, 2): w = git.PackWriter() for i in range(start, start + 2): hashes.append(w.new_blob(str(i))) log('\n') idxnames.append(os.path.basename(w.close() + '.idx')) r = git.PackIdxList(packdir) WVPASSEQ(len(r.packs), 2) for e, idxname in enumerate(idxnames): for i in range(e * 2, (e + 1) * 2): WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
def getattr(self, path): global opt if self.verbose > 0: log('--getattr(%r)\n' % path) res = vfs.lresolve(self.repo, path, want_meta=(not self.fake_metadata)) name, item = res[-1] if not item: return -errno.ENOENT if self.fake_metadata: item = vfs.augment_item_meta(self.repo, item, include_size=True) else: item = vfs.ensure_item_has_metadata(self.repo, item, include_size=True) meta = item.meta # FIXME: do we want/need to do anything more with nlink? st = fuse.Stat(st_mode=meta.mode, st_nlink=1, st_size=meta.size) st.st_mode = meta.mode st.st_uid = meta.uid st.st_gid = meta.gid st.st_atime = max(0, xstat.fstime_floor_secs(meta.atime)) st.st_mtime = max(0, xstat.fstime_floor_secs(meta.mtime)) st.st_ctime = max(0, xstat.fstime_floor_secs(meta.ctime)) return st
def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) if not os.path.exists(bloomfilename): log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename)) return b = bloom.ShaBloom(bloomfilename) if not b.valid(): add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) return base = os.path.basename(idx) if base not in b.idxnames: log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) return if base == idx: idx = os.path.join(path, idx) log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) log('bloom: checking %s\n' % path_msg(ridx)) for objsha in git.open_idx(idx): if not b.exists(objsha): add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
def is_par2_parallel(): # A true result means it definitely allows -t1; a false result is # technically inconclusive, but likely means no. tmpdir = mkdtemp(prefix=b'bup-fsck') try: canary = tmpdir + b'/canary' with open(canary, 'wb') as f: f.write(b'canary\n') p = subprocess.Popen((b'par2', b'create', b'-qq', b'-t1', canary), stderr=PIPE, stdin=nullf) _, err = p.communicate() parallel = p.returncode == 0 if opt.verbose: if err != b'Invalid option specified: -t1\n': log('Unexpected par2 error output\n') log(repr(err)) if parallel: log('Assuming par2 supports parallel processing\n') else: log('Assuming par2 does not support parallel processing\n') return parallel finally: rmtree(tmpdir)
def __init__(self, filename): self.closed = False self.name = filename self.force_keep = False self.map = None assert(filename.endswith(b'.midx')) self.map = mmap_read(open(filename)) if self.map[0:4] != b'MIDX': log('Warning: skipping: invalid MIDX header in %r\n' % path_msg(filename)) self.force_keep = True self._init_failed() return ver = struct.unpack('!I', self.map[4:8])[0] if ver < MIDX_VERSION: log('Warning: ignoring old-style (v%d) midx %r\n' % (ver, path_msg(filename))) self.force_keep = False # old stuff is boring self._init_failed() return if ver > MIDX_VERSION: log('Warning: ignoring too-new (v%d) midx %r\n' % (ver, path_msg(filename))) self.force_keep = True # new stuff is exciting self._init_failed() return self.bits = _helpers.firstword(self.map[8:12]) self.entries = 2**self.bits self.fanout_ofs = 12 # fanout len is self.entries * 4 self.sha_ofs = self.fanout_ofs + self.entries * 4 self.nsha = self._fanget(self.entries - 1) # sha table len is self.nsha * 20 self.which_ofs = self.sha_ofs + 20 * self.nsha # which len is self.nsha * 4 self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0')
def completer(text, iteration): global repo global _last_line global _last_res global rl_completion_suppress_append if rl_completion_suppress_append is not None: rl_completion_suppress_append.value = 1 try: line = readline.get_line_buffer()[:readline.get_endidx()] if _last_line != line: _last_res = _completer_get_subs(repo, line) _last_line = line (dir, name, qtype, lastword, subs) = _last_res if iteration < len(subs): path = subs[iteration] leaf_name, leaf_item = path[-1] res = vfs.try_resolve(repo, leaf_name, parent=path[:-1]) leaf_name, leaf_item = res[-1] fullname = os.path.join(*(name for name, item in res)) if stat.S_ISDIR(vfs.item_mode(leaf_item)): ret = shquote.what_to_add(qtype, lastword, fullname + '/', terminate=False) else: ret = shquote.what_to_add( qtype, lastword, fullname, terminate=True) + ' ' return text + ret except Exception as e: log('\n') try: import traceback traceback.print_tb(sys.exc_traceback) except Exception as e2: log('Error printing traceback: %s\n' % e2) log('\nError in completion: %s\n' % e)
graft_points.append((resolve_parent(old_path), resolve_parent(new_path))) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") name = opt.name if name and not valid_save_name(name): o.fatal("'%s' is not a valid branch name" % path_msg(name)) refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: try: cli = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter(compression_level=opt.compress) handle_ctrl_c() # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory # elements, in the order they're listed in the index.
if opt.ignore_midx: for pack in mi.packs: do_predict(pack) else: do_predict(mi) else: # default mode: find longest matching prefix last = '\0' * 20 longmatch = 0 for i in mi: if i == last: continue #assert(str(i) >= last) pm = _helpers.bitmatch(last, i) longmatch = max(longmatch, pm) last = i print(longmatch) log('%d matching prefix bits\n' % longmatch) doublings = math.log(len(mi), 2) bpd = longmatch / doublings log('%.2f bits per doubling\n' % bpd) remain = 160 - longmatch rdoublings = remain / bpd log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings)) larger = 2**rdoublings log('%g times larger is possible\n' % larger) perperson = larger / POPULATION_OF_EARTH log('\nEveryone on earth could have %d data sets like yours, all in one\n' 'repository, and we would expect 1 object collision.\n' % int(perperson))
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') fake_hash = None if opt.fake_valid: def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for path, pst in recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs, xdev_exceptions=xdev_exceptions): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed need_repack = False if (rig.cur.stale(pst, tstart, check_device=opt.check_device)): try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) # "faked" entries will be stale(), and so we'll invalidate # them below. meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.update_from_stat(pst, meta_ofs) rig.cur.invalidate() need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): if fake_hash: rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID need_repack = True if opt.fake_invalid: rig.cur.invalidate() need_repack = True if need_repack: rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) hlinks.prepare_save() if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile, msw, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? mi.add_ixentry(e) ri.close() mi.close() wr.close() wi.abort() else: wi.close() msw.close() hlinks.commit_save()
o.fatal('cannot clear an external index (via -f)') # FIXME: remove this once we account for timestamp races, i.e. index; # touch new-file; index. It's possible for this to happen quickly # enough that new-file ends up with the same timestamp as the first # index, and then bup will ignore it. tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() indexfile = opt.indexfile or git.repo('bupindex') handle_ctrl_c() if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile)) if opt.clear: log('clear: clearing index.\n') clear_index(indexfile) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') excluded_paths = parse_excludes(flags, o.fatal) exclude_rxs = parse_rx_excludes(flags, o.fatal) xexcept = index.unique_resolved_paths(extra) for rp, path in index.reduce_paths(extra): update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept)
def logcmd(cmd): log(shstr(cmd).decode(errors='backslashreplace') + '\n')
""" o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() tags = [t for sublist in git.tags().values() for t in sublist] if opt.delete: # git.delete_ref() doesn't complain if a ref doesn't exist. We # could implement this verification but we'd need to read in the # contents of the tag file and pass the hash, and we already know # about the tag's existance via "tags". if not opt.force and opt.delete not in tags: log("error: tag '%s' doesn't exist\n" % opt.delete) sys.exit(1) tag_file = 'refs/tags/%s' % opt.delete git.delete_ref(tag_file) sys.exit(0) if not extra: for t in tags: print t sys.exit(0) elif len(extra) < 2: o.fatal('no commit ref or hash given.') (tag_name, commit) = extra[:2] if not tag_name: o.fatal("tag name must not be empty.")
def main(argv): opt = opts_from_cmdline(argv) if opt.verbose >= 2: git.verbose = opt.verbose - 1 if opt.fanout: hashsplit.fanout = opt.fanout if opt.blobs: hashsplit.fanout = 0 if opt.bwlimit: client.bwlimit = opt.bwlimit start_time = time.time() sys.stdout.flush() out = byte_stream(sys.stdout) stdin = byte_stream(sys.stdin) if opt.git_ids: # the input is actually a series of git object ids that we should retrieve # and split. # # This is a bit messy, but basically it converts from a series of # CatPipe.get() iterators into a series of file-type objects. # It would be less ugly if either CatPipe.get() returned a file-like object # (not very efficient), or split_to_shalist() expected an iterator instead # of a file. cp = git.CatPipe() class IterToFile: def __init__(self, it): self.it = iter(it) def read(self, size): v = next(self.it, None) return v or b'' def read_ids(): while 1: line = stdin.readline() if not line: break if line: line = line.strip() try: it = cp.get(line.strip()) next(it, None) # skip the file info except KeyError as e: add_error('error: %s' % e) continue yield IterToFile(it) files = read_ids() else: # the input either comes from a series of files or from stdin. if opt.sources: files = (open(argv_bytes(fn), 'rb') for fn in opt.sources) else: files = [stdin] writing = not (opt.noop or opt.copy) remote_dest = opt.remote or opt.is_reverse if writing: git.check_repo_or_die() if remote_dest and writing: cli = repo = client.Client(opt.remote) else: cli = nullcontext() repo = git # cli creation must be last nontrivial command in each if clause above with cli: if opt.name and writing: refname = opt.name and b'refs/heads/%s' % opt.name oldref = repo.read_ref(refname) else: refname = oldref = None if not writing: pack_writer = NoOpPackWriter() elif not remote_dest: pack_writer = git.PackWriter(compression_level=opt.compress, max_pack_size=opt.max_pack_size, max_pack_objects=opt.max_pack_objects) else: pack_writer = cli.new_packwriter( compression_level=opt.compress, max_pack_size=opt.max_pack_size, max_pack_objects=opt.max_pack_objects) # packwriter creation must be last command in each if clause above with pack_writer: commit = split(opt, files, oldref, out, pack_writer) # pack_writer must be closed before we can update the ref if refname: repo.update_ref(refname, commit, oldref) secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' % (size / 1024, secs, size / 1024 / secs)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
# is inherited here sp = subprocess.Popen( [path.exe(), b'server', b'--force-repo', b'--mode=' + mode], stdin=p.stdout, stdout=p.stdin) p.stdin.close() p.stdout.close() # Demultiplex remote client's stderr (back to stdout/stderr). dmc = DemuxConn(p.stderr.fileno(), open(os.devnull, "wb")) for line in iter(dmc.readline, b''): out.write(line) finally: while 1: # if we get a signal while waiting, we have to keep waiting, just # in case our child doesn't die. try: ret = p.wait() if sp: sp.wait() break except SigException as e: log('\nbup on: %s\n' % e) os.kill(p.pid, e.signum) ret = 84 except SigException as e: if ret == 0: ret = 99 log('\nbup on: %s\n' % e) sys.exit(ret)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if not (opt.modified or \ opt['print'] or \ opt.status or \ opt.update or \ opt.check or \ opt.clear): opt.update = 1 if (opt.fake_valid or opt.fake_invalid) and not opt.update: o.fatal('--fake-{in,}valid are meaningless without -u') if opt.fake_valid and opt.fake_invalid: o.fatal('--fake-valid is incompatible with --fake-invalid') if opt.clear and opt.indexfile: o.fatal('cannot clear an external index (via -f)') # FIXME: remove this once we account for timestamp races, i.e. index; # touch new-file; index. It's possible for this to happen quickly # enough that new-file ends up with the same timestamp as the first # index, and then bup will ignore it. tick_start = time.time() time.sleep(1 - (tick_start - int(tick_start))) git.check_repo_or_die() handle_ctrl_c() if opt.verbose is None: opt.verbose = 0 if opt.indexfile: indexfile = argv_bytes(opt.indexfile) else: indexfile = git.repo(b'bupindex') if opt.check: log('check: starting initial check.\n') check_index(index.Reader(indexfile), opt.verbose) if opt.clear: log('clear: clearing index.\n') clear_index(indexfile, opt.verbose) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.update: if not extra: o.fatal('update mode (-u) requested but no paths given') extra = [argv_bytes(x) for x in extra] excluded_paths = parse_excludes(flags, o.fatal) exclude_rxs = parse_rx_excludes(flags, o.fatal) xexcept = index.unique_resolved_paths(extra) for rp, path in index.reduce_paths(extra): update_index(rp, excluded_paths, exclude_rxs, indexfile, check=opt.check, check_device=opt.check_device, xdev=opt.xdev, xdev_exceptions=xexcept, fake_valid=opt.fake_valid, fake_invalid=opt.fake_invalid, out=out, verbose=opt.verbose) if opt['print'] or opt.status or opt.modified: extra = [argv_bytes(x) for x in extra] for name, ent in index.Reader(indexfile).filter(extra or [b'']): if (opt.modified and (ent.is_valid() or ent.is_deleted() or not ent.mode)): continue line = b'' if opt.status: if ent.is_deleted(): line += b'D ' elif not ent.is_valid(): if ent.sha == index.EMPTY_SHA: line += b'A ' else: line += b'M ' else: line += b' ' if opt.hash: line += hexlify(ent.sha) + b' ' if opt.long: line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'), oct(ent.gitmode).encode('ascii')) out.write(line + (name or b'./') + b'\n') if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): log('check: starting final check.\n') check_index(index.Reader(indexfile), opt.verbose) if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.verbose is None: opt.verbose = 0 if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") if opt.copy and (opt.blobs or opt.tree): o.fatal('--copy is incompatible with -b, -t') if (opt.noop or opt.copy) and (opt.commit or opt.name): o.fatal('--noop and --copy are incompatible with -c, -n') if opt.blobs and (opt.tree or opt.commit or opt.name): o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: o.fatal("don't provide filenames when using --git-ids") if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 max_pack_size = None if opt.max_pack_size: max_pack_size = parse_num(opt.max_pack_size) max_pack_objects = None if opt.max_pack_objects: max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.date: date = parse_date_or_fatal(opt.date, o.fatal) else: date = time.time() # Hack around lack of nonlocal vars in python 2 total_bytes = [0] def prog(filenum, nbytes): total_bytes[0] += nbytes if filenum > 0: qprogress('Splitting: file #%d, %d kbytes\r' % (filenum + 1, total_bytes[0] // 1024)) else: qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024)) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() if opt.name and not valid_save_name(opt.name): o.fatal("'%r' is not a valid branch name." % opt.name) refname = opt.name and b'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = pack_writer = oldref = None elif opt.remote or is_reverse: git.check_repo_or_die() cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None pack_writer = cli.new_packwriter(compression_level=opt.compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) else: git.check_repo_or_die() cli = None oldref = refname and git.read_ref(refname) or None pack_writer = git.PackWriter(compression_level=opt.compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) input = byte_stream(sys.stdin) if opt.git_ids: # the input is actually a series of git object ids that we should retrieve # and split. # # This is a bit messy, but basically it converts from a series of # CatPipe.get() iterators into a series of file-type objects. # It would be less ugly if either CatPipe.get() returned a file-like object # (not very efficient), or split_to_shalist() expected an iterator instead # of a file. cp = git.CatPipe() class IterToFile: def __init__(self, it): self.it = iter(it) def read(self, size): v = next(self.it, None) return v or b'' def read_ids(): while 1: line = input.readline() if not line: break if line: line = line.strip() try: it = cp.get(line.strip()) next(it, None) # skip the file info except KeyError as e: add_error('error: %s' % e) continue yield IterToFile(it) files = read_ids() else: # the input either comes from a series of files or from stdin. files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input] if pack_writer: new_blob = pack_writer.new_blob new_tree = pack_writer.new_tree elif opt.blobs or opt.tree: # --noop mode new_blob = lambda content: git.calc_hash(b'blob', content) new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist)) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.blobs: shalist = hashsplit.split_to_blobs(new_blob, files, keep_boundaries=opt.keep_boundaries, progress=prog) for (sha, size, level) in shalist: out.write(hexlify(sha) + b'\n') reprogress() elif opt.tree or opt.commit or opt.name: if opt.name: # insert dummy_name which may be used as a restore target mode, sha = \ hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode) shalist = [(mode, splitfile_name, sha)] else: shalist = hashsplit.split_to_shalist( new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) tree = new_tree(shalist) else: last = 0 it = hashsplit.hashsplit_iter(files, keep_boundaries=opt.keep_boundaries, progress=prog) for (blob, level) in it: hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split // 1024 // 1024 if not opt.quiet and last != megs: last = megs if opt.verbose: log('\n') if opt.tree: out.write(hexlify(tree) + b'\n') if opt.commit or opt.name: msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb() ref = opt.name and (b'refs/heads/%s' % opt.name) or None userline = b'%s <%s@%s>' % (userfullname(), username(), hostname()) commit = pack_writer.new_commit(tree, oldref, userline, date, None, userline, date, None, msg) if opt.commit: out.write(hexlify(commit) + b'\n') if pack_writer: pack_writer.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' % (size / 1024, secs, size / 1024 / secs)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def _do_midx(outdir, outfilename, infilenames, prefixstr, auto=False, force=False): global _first if not outfilename: assert(outdir) sum = hexlify(Sha1(b'\0'.join(infilenames)).digest()) outfilename = b'%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20]) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b'' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (auto and (total < 1024 and len(infilenames) < 3)) \ or ((auto or force) and len(infilenames) < 2) \ or (force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write(b'MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write(b'\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) log(repr(p.idxnames) + '\n') assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = next(pi) assert(i == pin) assert(p.exists(i)) return total, outfilename
try: m.uid = int(opt.set_uid) except ValueError: o.fatal("uid must be an integer") if opt.set_gid is not None: try: m.gid = int(opt.set_gid) except ValueError: o.fatal("gid must be an integer") if unset_user: m.user = '' elif opt.set_user is not None: m.user = opt.set_user if unset_group: m.group = '' elif opt.set_group is not None: m.group = opt.set_group m.write(output_file) finally: f.close() if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) else: sys.exit(0)
elif cmd == b'get': if len(words) not in [2, 3]: rv = 1 raise Exception('Usage: get <filename> [localname]') rname = words[1] (dir, base) = os.path.split(rname) lname = len(words) > 2 and words[2] or base res = vfs.resolve(repo, rname, parent=pwd) _, leaf_item = res[-1] if not leaf_item: raise Exception('%s does not exist' % path_msg(b'/'.join(name for name, item in res))) with vfs.fopen(repo, leaf_item) as srcfile: with open(lname, 'wb') as destfile: log('Saving %s\n' % path_msg(lname)) write_to_file(srcfile, destfile) elif cmd == b'mget': for parm in words[1:]: dir, base = os.path.split(parm) res = vfs.resolve(repo, dir, parent=pwd) _, dir_item = res[-1] if not dir_item: raise Exception('%s does not exist' % path_msg(dir)) for name, item in vfs.contents(repo, dir_item): if name == b'.': continue if fnmatch.fnmatch(name, base): if stat.S_ISLNK(vfs.item_mode(item)): deref = vfs.resolve(repo, name, parent=res)
def split(opt, files, parent, out, pack_writer): # Hack around lack of nonlocal vars in python 2 total_bytes = [0] def prog(filenum, nbytes): total_bytes[0] += nbytes if filenum > 0: qprogress('Splitting: file #%d, %d kbytes\r' % (filenum + 1, total_bytes[0] // 1024)) else: qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024)) new_blob = pack_writer.new_blob new_tree = pack_writer.new_tree if opt.blobs: shalist = hashsplit.split_to_blobs(new_blob, files, keep_boundaries=opt.keep_boundaries, progress=prog) for sha, size, level in shalist: out.write(hexlify(sha) + b'\n') reprogress() elif opt.tree or opt.commit or opt.name: if opt.name: # insert dummy_name which may be used as a restore target mode, sha = \ hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode) shalist = [(mode, splitfile_name, sha)] else: shalist = \ hashsplit.split_to_shalist(new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) tree = new_tree(shalist) else: last = 0 it = hashsplit.hashsplit_iter(files, keep_boundaries=opt.keep_boundaries, progress=prog) for blob, level in it: hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split // 1024 // 1024 if not opt.quiet and last != megs: last = megs if opt.verbose: log('\n') if opt.tree: out.write(hexlify(tree) + b'\n') commit = None if opt.commit or opt.name: msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb() userline = b'%s <%s@%s>' % (userfullname(), username(), hostname()) commit = pack_writer.new_commit(tree, parent, userline, opt.date, None, userline, opt.date, None, msg) if opt.commit: out.write(hexlify(commit) + b'\n') return commit
def main(argv): target_filename = b'' active_fields = metadata.all_fields o = options.Options(optspec) (opt, flags, remainder) = o.parse_bytes(argv[1:]) atime_resolution = parse_timestamp_arg('atime', opt.atime_resolution) mtime_resolution = parse_timestamp_arg('mtime', opt.mtime_resolution) ctime_resolution = parse_timestamp_arg('ctime', opt.ctime_resolution) treat_include_fields_as_definitive = True for flag, value in flags: if flag == '--exclude-fields': exclude_fields = frozenset(value.split(',')) for f in exclude_fields: if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') active_fields = active_fields - exclude_fields treat_include_fields_as_definitive = False elif flag == '--include-fields': include_fields = frozenset(value.split(',')) for f in include_fields: if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') if treat_include_fields_as_definitive: active_fields = include_fields treat_include_fields_as_definitive = False else: active_fields = active_fields | include_fields opt.verbose = opt.verbose or 0 opt.quiet = opt.quiet or 0 metadata.verbose = opt.verbose - opt.quiet sys.stdout.flush() out = byte_stream(sys.stdout) first_path = True for path in remainder: path = argv_bytes(path) try: m = metadata.from_path(path, archive_path=path) except (OSError, IOError) as e: if e.errno == errno.ENOENT: add_error(e) continue else: raise if metadata.verbose >= 0: if not first_path: out.write(b'\n') if atime_resolution != 1: m.atime = (m.atime / atime_resolution) * atime_resolution if mtime_resolution != 1: m.mtime = (m.mtime / mtime_resolution) * mtime_resolution if ctime_resolution != 1: m.ctime = (m.ctime / ctime_resolution) * ctime_resolution out.write(metadata.detailed_bytes(m, active_fields)) out.write(b'\n') first_path = False if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) else: sys.exit(0)
def main(): handle_ctrl_c() is_reverse = environ.get(b'BUP_SERVER_REVERSE') opt = parse_args(compat.argv) git.check_repo_or_die() if opt.source: opt.source = argv_bytes(opt.source) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if is_reverse and opt.remote: misuse("don't use -r in reverse mode; it's automatic") if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.remote or is_reverse: dest_repo = RemoteRepo(opt.remote) else: dest_repo = LocalRepo() with dest_repo as dest_repo: with LocalRepo(repo_dir=opt.source) as src_repo: with dest_repo.new_packwriter(compression_level=opt.compress) as writer: # Resolve and validate all sources and destinations, # implicit or explicit, and do it up-front, so we can # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, src_repo, dest_repo) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = {'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'replace': handle_replace, 'unnamed': handle_unnamed} for item in target_items: debug1('get-spec: %r\n' % (item.spec,)) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith(b'/.tag/'): dest_ref = b'refs/tags/%s' % dest_path[6:] else: dest_ref = b'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash cur_ref = cur_ref or dest_hash handler = handlers[item.spec.method] item_result = handler(item, src_repo, writer, opt) if len(item_result) > 1: new_id, tree = item_result else: new_id = item_result[0] if not dest_ref: log_item(item.spec.src, item.src.type, opt) else: updated_refs[dest_ref] = (orig_ref, new_id) if dest_ref.startswith(b'refs/tags/'): log_item(item.spec.src, item.src.type, opt, tag=new_id) else: log_item(item.spec.src, item.src.type, opt, tree=tree, commit=new_id) # Only update the refs at the very end, once the writer is # closed, so that if something goes wrong above, the old refs # will be undisturbed. for ref_name, info in items(updated_refs): orig_ref, new_ref = info try: dest_repo.update_ref(ref_name, new_ref, orig_ref) if opt.verbose: new_hex = hexlify(new_ref) if orig_ref: orig_hex = hexlify(orig_ref) log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError) as ex: add_error('unable to update ref %r: %s' % (ref_name, ex)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
except ImportError: print('error: cannot find the python "fuse" module; please install it', file=sys.stderr) sys.exit(2) if not hasattr(fuse, '__version__'): print('error: fuse module is too old for fuse.__version__', file=sys.stderr) sys.exit(2) fuse.fuse_python_api = (0, 2) if sys.version_info[0] > 2: try: fuse_ver = fuse.__version__.split('.') fuse_ver_maj = int(fuse_ver[0]) except: log('error: cannot determine the fuse major version; please report', file=sys.stderr) sys.exit(2) if len(fuse_ver) < 3 or fuse_ver_maj < 1: print( "error: fuse module can't handle binary data; please upgrade to 1.0+\n", file=sys.stderr) sys.exit(2) from bup import options, git, vfs, xstat from bup.compat import argv_bytes, fsdecode, py_maj from bup.helpers import log from bup.repo import LocalRepo # FIXME: self.meta and want_meta? # The path handling is just wrong, but the current fuse module can't
def refresh(self, skip_midx=False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The instance variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ if self.bloom is not None: self.bloom.close() self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or self.ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] midxes = set(glob.glob(os.path.join(self.dir, b'*.midx'))) # remove any *.midx files from our list that no longer exist for ix in list(d.values()): if not isinstance(ix, midx.PackMidx): continue if ix.name in midxes: continue # remove the midx del d[ix.name] ix.close() self.packs.remove(ix) for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in midxes: if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' ' used by %s\n') % (path_msg(n), path_msg(mxf))) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort( key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % path_msg(os.path.basename(ix.name))) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir, b'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, b'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(reverse=True, key=lambda x: len(x)) if self.bloom and self.bloom.valid() and len( self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs) != 1 and 'es' or ''))
o = options.Options(optspec) (opt, flags, extra) = o.parse(sys.argv[1:]) git.check_repo_or_die() repo = LocalRepo() pwd = vfs.resolve(repo, '/') rv = 0 if extra: lines = extra else: try: import readline except ImportError: log('* readline module not available: line editing disabled.\n') readline = None if readline: readline.set_completer_delims(' \t\n\r/') readline.set_completer(completer) if sys.platform.startswith('darwin'): # MacOS uses a slightly incompatible clone of libreadline readline.parse_and_bind('bind ^I rl_complete') readline.parse_and_bind('tab: complete') init_readline_vars() lines = inputiter() for line in lines: if not line.strip(): continue