def connect(rhost, port, subcmd, stderr=None): """Connect to 'rhost' and execute the bup subcommand 'subcmd' on it.""" assert not re.search(br'[^\w-]', subcmd) if rhost is None or rhost == b'-': argv = [path.exe(), subcmd] else: buglvl = helpers.atoi(environ.get(b'BUP_DEBUG')) force_tty = helpers.atoi(environ.get(b'BUP_FORCE_TTY')) cmd = b""" sh -c 'BUP_DEBUG=%d BUP_FORCE_TTY=%d bup %s' """ % (buglvl, force_tty, subcmd) argv = [b'ssh'] if port: argv.extend((b'-p', port)) argv.extend((rhost, b'--', cmd.strip())) #helpers.log('argv is: %r\n' % argv) if sys.version_info[0] < 3: return subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr, preexec_fn=lambda: os.setsid()) else: return subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr, start_new_session=True)
def test_utc_offset_str(): with no_lingering_errors(): tz = environ.get(b'TZ') try: environ[b'TZ'] = b'FOO+0:00' WVPASSEQ(utc_offset_str(0), b'+0000') environ[b'TZ'] = b'FOO+1:00' WVPASSEQ(utc_offset_str(0), b'-0100') environ[b'TZ'] = b'FOO-1:00' WVPASSEQ(utc_offset_str(0), b'+0100') environ[b'TZ'] = b'FOO+3:3' WVPASSEQ(utc_offset_str(0), b'-0303') environ[b'TZ'] = b'FOO-3:3' WVPASSEQ(utc_offset_str(0), b'+0303') # Offset is not an integer number of minutes environ[b'TZ'] = b'FOO+3:3:3' WVPASSEQ(utc_offset_str(1), b'-0303') environ[b'TZ'] = b'FOO-3:3:3' WVPASSEQ(utc_offset_str(1), b'+0303') WVPASSEQ(utc_offset_str(314159), b'+0303') finally: if tz: environ[b'TZ'] = tz else: try: del environ[b'TZ'] except KeyError: pass
def test_utc_offset_str(): tz = environ.get(b'TZ') tzset() try: set_tz(b'FOO+0:00') WVPASSEQ(utc_offset_str(0), b'+0000') set_tz(b'FOO+1:00') WVPASSEQ(utc_offset_str(0), b'-0100') set_tz(b'FOO-1:00') WVPASSEQ(utc_offset_str(0), b'+0100') set_tz(b'FOO+3:3') WVPASSEQ(utc_offset_str(0), b'-0303') set_tz(b'FOO-3:3') WVPASSEQ(utc_offset_str(0), b'+0303') # Offset is not an integer number of minutes set_tz(b'FOO+3:3:3') WVPASSEQ(utc_offset_str(1), b'-0303') set_tz(b'FOO-3:3:3') WVPASSEQ(utc_offset_str(1), b'+0303') WVPASSEQ(utc_offset_str(314159), b'+0303') finally: if tz: set_tz(tz) else: try: set_tz(None) except KeyError: pass
def require_suitable_git(ver_str=None): """Raise GitError if the version of git isn't suitable. Rely on ver_str when provided, rather than invoking the git in the path. """ global _git_great if _git_great is not None: return if environ.get(b'BUP_GIT_VERSION_IS_FINE', b'').lower() \ in (b'yes', b'true', b'1'): _git_great = True return if not ver_str: ver_str, _, _ = _git_exo([b'git', b'--version']) status = is_suitable_git(ver_str) if status == 'unrecognized': raise GitError('Unexpected git --version output: %r' % ver_str) if status == 'insufficient': log('error: git version must be at least 1.5.6\n') sys.exit(1) if status == 'suitable': _git_great = True return assert False
def connect(rhost, port, subcmd, stderr=None): """Connect to 'rhost' and execute the bup subcommand 'subcmd' on it.""" assert not re.search(br'[^\w-]', subcmd) nicedir = re.sub(b':', b'_', path.exedir()) if rhost == b'-': rhost = None if not rhost: argv = [b'bup', subcmd] else: # WARNING: shell quoting security holes are possible here, so we # have to be super careful. We have to use 'sh -c' because # csh-derived shells can't handle PATH= notation. We can't # set PATH in advance, because ssh probably replaces it. We # can't exec *safely* using argv, because *both* ssh and 'sh -c' # allow shellquoting. So we end up having to double-shellquote # stuff here. escapedir = re.sub(br'([^\w/])', br'\\\\\\\1', nicedir) buglvl = helpers.atoi(environ.get(b'BUP_DEBUG')) force_tty = helpers.atoi(environ.get(b'BUP_FORCE_TTY')) cmd = b""" sh -c PATH=%s:'$PATH BUP_DEBUG=%s BUP_FORCE_TTY=%s bup %s' """ % (escapedir, buglvl, force_tty, subcmd) argv = [b'ssh'] if port: argv.extend((b'-p', port)) argv.extend((rhost, b'--', cmd.strip())) #helpers.log('argv is: %r\n' % argv) if rhost: env = environ else: envpath = environ.get(b'PATH') env = environ.copy() env[b'PATH'] = nicedir if not envpath else nicedir + b':' + envpath if sys.version_info[0] < 3: return subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr, env=env, preexec_fn=lambda: os.setsid()) else: return subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr, env=env, start_new_session=True)
def from_opts(opt, reverse=True): """ Return a repo - understands: * the following optional options: - max-pack-size - max-pack-objects - compress - remote * the BUP_SERVER_REVERSE environment variable """ git.check_repo_or_die() if reverse: is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: log("error: don't use -r in reverse mode; it's automatic") sys.exit(97) else: is_reverse = False try: compress = opt.compress except (KeyError, AttributeError): compress = None try: max_pack_size = parse_num( opt.max_pack_size) if opt.max_pack_size else None except (KeyError, AttributeError): max_pack_size = None try: max_pack_objects = parse_num( opt.max_pack_objects) if opt.max_pack_objects else None except (KeyError, AttributeError): max_pack_objects = None try: if opt.remote: return make_repo(opt.remote, compression_level=compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) if is_reverse: return make_repo(b'reverse://%s' % is_reverse, compression_level=compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) return LocalRepo(compression_level=compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) except client.ClientError as e: log('error: %s' % e) sys.exit(1)
def __init__(self, remote, create=False): self.closed = False self._busy = self.conn = None self.sock = self.p = self.pout = self.pin = None try: is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse: assert(not remote) remote = b'%s:' % is_reverse (self.protocol, self.host, self.port, self.dir) = parse_remote(remote) # The b'None' here matches python2's behavior of b'%s' % None == 'None', # python3 will (as of version 3.7.5) do the same for str ('%s' % None), # but crashes instead when doing b'%s' % None. cachehost = b'None' if self.host is None else self.host cachedir = b'None' if self.dir is None else self.dir self.cachedir = git.repo(b'index-cache/%s' % re.sub(br'[^@\w]', b'_', b'%s:%s' % (cachehost, cachedir))) if is_reverse: self.pout = os.fdopen(3, 'rb') self.pin = os.fdopen(4, 'wb') self.conn = Conn(self.pout, self.pin) else: if self.protocol in (b'ssh', b'file'): try: # FIXME: ssh and file shouldn't use the same module self.p = ssh.connect(self.host, self.port, b'server') self.pout = self.p.stdout self.pin = self.p.stdin self.conn = Conn(self.pout, self.pin) except OSError as e: reraise(ClientError('connect: %s' % e)) elif self.protocol == b'bup': self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect((self.host, 1982 if self.port is None else int(self.port))) self.sockw = self.sock.makefile('wb') self.conn = DemuxConn(self.sock.fileno(), self.sockw) self._available_commands = self._get_available_commands() self._require_command(b'init-dir') self._require_command(b'set-dir') if self.dir: self.dir = re.sub(br'[\r\n]', ' ', self.dir) if create: self.conn.write(b'init-dir %s\n' % self.dir) else: self.conn.write(b'set-dir %s\n' % self.dir) self.check_ok() self.sync_indexes() except BaseException as ex: with pending_raise(ex): self.close()
def guess_repo(path=None): """Set the path value in the global variable "repodir". This makes bup look for an existing bup repository, but not fail if a repository doesn't exist. Usually, if you are interacting with a bup repository, you would not be calling this function but using check_repo_or_die(). """ global repodir if path: repodir = path if not repodir: repodir = environ.get(b'BUP_DIR') if not repodir: repodir = os.path.expanduser(b'~/.bup')
def opts_from_cmdline(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) opt.sources = extra if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.verbose is None: opt.verbose = 0 if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") if opt.copy and (opt.blobs or opt.tree): o.fatal('--copy is incompatible with -b, -t') if (opt.noop or opt.copy) and (opt.commit or opt.name): o.fatal('--noop and --copy are incompatible with -c, -n') if opt.blobs and (opt.tree or opt.commit or opt.name): o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: o.fatal("don't provide filenames when using --git-ids") if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 if opt.max_pack_size: opt.max_pack_size = parse_num(opt.max_pack_size) if opt.max_pack_objects: opt.max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: opt.fanout = parse_num(opt.fanout) if opt.bwlimit: opt.bwlimit = parse_num(opt.bwlimit) if opt.date: opt.date = parse_date_or_fatal(opt.date, o.fatal) else: opt.date = time.time() opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE') if opt.is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") if opt.name and not valid_save_name(opt.name): o.fatal("'%r' is not a valid branch name." % opt.name) return opt
def main(argv): # Hack around lack of nonlocal vars in python 2 _nonlocal = {} o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.indexfile: opt.indexfile = argv_bytes(opt.indexfile) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.strip_path: opt.strip_path = argv_bytes(opt.strip_path) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") extra = [argv_bytes(x) for x in extra] opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.date: date = parse_date_or_fatal(opt.date, o.fatal) else: date = time.time() if opt.strip and opt.strip_path: o.fatal("--strip is incompatible with --strip-path") graft_points = [] if opt.graft: if opt.strip: o.fatal("--strip is incompatible with --graft") if opt.strip_path: o.fatal("--strip-path is incompatible with --graft") for (option, parameter) in flags: if option == "--graft": parameter = argv_bytes(parameter) splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal( "a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") graft_points.append( (resolve_parent(old_path), resolve_parent(new_path))) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") name = opt.name if name and not valid_save_name(name): o.fatal("'%s' is not a valid branch name" % path_msg(name)) refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: try: cli = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter(compression_level=opt.compress) handle_ctrl_c() # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory # elements, in the order they're listed in the index. # # Since the git tree elements are sorted according to # git.shalist_item_sort_key, the metalist items are accumulated as # (sort_key, metadata) tuples, and then sorted when the .bupm file is # created. The sort_key should have been computed using the element's # mangled name and git mode (after hashsplitting), but the code isn't # actually doing that but rather uses the element's real name and mode. # This makes things a bit more difficult when reading it back, see # vfs.ordered_tree_entries(). # Maintain a stack of information representing the current location in # the archive being constructed. The current path is recorded in # parts, which will be something like ['', 'home', 'someuser'], and # the accumulated content and metadata for of the dirs in parts is # stored in parallel stacks in shalists and metalists. parts = [] # Current archive position (stack of dir names). shalists = [] # Hashes for each dir in paths. metalists = [] # Metadata for each dir in paths. def _push(part, metadata): # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) metalists.append([(b'', metadata)]) # This dir's metadata (no name). def _pop(force_tree, dir_metadata=None): # Leave the current archive directory and add its tree to its parent. assert (len(parts) >= 1) part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() # FIXME: only test if collision is possible (i.e. given --strip, etc.)? if force_tree: tree = force_tree else: names_seen = set() clean_list = [] metaidx = 1 # entry at 0 is for the dir for x in shalist: name = x[1] if name in names_seen: parent_path = b'/'.join(parts) + b'/' add_error('error: ignoring duplicate path %s in %s' % (path_msg(name), path_msg(parent_path))) if not stat.S_ISDIR(x[0]): del metalist[metaidx] else: names_seen.add(name) clean_list.append(x) if not stat.S_ISDIR(x[0]): metaidx += 1 if dir_metadata: # Override the original metadata pushed for this dir. metalist = [(b'', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key=lambda x: x[0]) metadata = b''.join([m[1].encode() for m in sorted_metalist]) metadata_f = BytesIO(metadata) mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata_f], keep_boundaries=False) clean_list.append((mode, b'.bupm', id)) tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree)) return tree _nonlocal['count'] = 0 _nonlocal['subcount'] = 0 _nonlocal['lastremain'] = None def progress_report(n): _nonlocal['subcount'] += n cc = _nonlocal['count'] + _nonlocal['subcount'] pct = total and (cc * 100.0 / total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc / 1024. / elapsed) kps_frac = 10**int(math.log(kps + 1, 10) - 1) kps = int(kps / kps_frac) * kps_frac if cc: remain = elapsed * 1.0 / cc * (total - cc) else: remain = 0.0 if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] < 0.05)): remain = _nonlocal['lastremain'] else: _nonlocal['lastremain'] = remain hours = int(remain / 60 / 60) mins = int(remain / 60 - hours * 60) secs = int(remain - hours * 60 * 60 - mins * 60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs qprogress( 'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr)) indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + b'.meta') except IOError as ex: if ex.errno != EACCES: raise log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() def find_hardlink_target(hlink_db, ent): if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1: link_paths = hlink_db.node_paths(ent.dev, ent.ino) if link_paths: return link_paths[0] total = ftotal = 0 if opt.progress: for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): qprogress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report # Root collisions occur when strip or graft options map more than one # path to the same directory (paths which originally had separate # parents). When that situation is detected, use empty metadata for # the parent. Otherwise, use the metadata for the common parent. # Collision example: "bup save ... --strip /foo /foo/bar /bar". # FIXME: Add collision tests, or handle collisions some other way. # FIXME: Detect/handle strip/graft name collisions (other than root), # i.e. if '/foo/bar' and '/bar' both map to '/'. first_root = None root_collision = None tstart = time.time() fcount = 0 lastskip_name = None lastdir = b'' for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: if opt.verbose: log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue assert (dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, extra) elif opt.strip_path: dirp = stripped_path_components(dir, [opt.strip_path]) elif graft_points: dirp = grafted_path_components(graft_points, dir) else: dirp = path_components(dir) # At this point, dirp contains a representation of the archive # path that looks like [(archive_dir_name, real_fs_path), ...]. # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp # might look like this at some point: # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: first_root = dirp[0] elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree=None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path, normalized=True) \ if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name meta = metadata.Metadata() _push(dir_name, meta) if not file: if len(parts) == 1: continue # We're at the top level -- keep the current root dir # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree=oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(GIT_MODE_TREE, newtree) ent.repack() if exists and wasmissing: _nonlocal['count'] += oldsize continue # it's not a directory if hashvalid: id = ent.sha git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (ent.gitmode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) meta = msr.metadata_at(ent.meta_ofs) meta.hardlink_target = find_hardlink_target(hlink_db, ent) # Restore the times that were cleared to 0 in the metastore. (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) metalists[-1].append((sort_key, meta)) else: id = None hlink = find_hardlink_target(hlink_db, ent) try: meta = metadata.from_path( ent.name, hardlink_target=hlink, normalized=True, after_stat=after_nondir_metadata_stat) except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name continue if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode): # The mode changed since we indexed the file, this is bad. # This can cause two issues: # 1) We e.g. think the file is a regular file, but now it's # something else (a device, socket, FIFO or symlink, etc.) # and _read_ from it when we shouldn't. # 2) We then record it as valid, but don't update the index # metadata, and on a subsequent save it has 'hashvalid' # but is recorded as the file type from the index, when # the content is something else ... # Avoid all of these consistency issues by just skipping such # things - it really ought to not happen anyway. add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name)) lastskip_name = ent.name continue if stat.S_ISREG(ent.mode): try: # If the file changes while we're reading it, then our reading # may stop at some point, but the stat() above may have gotten # a different size already. Recalculate the meta size so that # the repository records the accurate size in the metadata, even # if the other stat() data might be slightly older than the file # content (which we can't fix, this is inherently racy, but we # can prevent the size mismatch.) meta.size = 0 def new_blob(data): meta.size += len(data) return w.new_blob(data) before_saving_regular_file(ent.name) with hashsplit.open_noatime(ent.name) as f: (mode, id) = hashsplit.split_to_blob_or_tree( new_blob, w.new_tree, [f], keep_boundaries=False) except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name elif stat.S_ISDIR(ent.mode): assert (0) # handled above elif stat.S_ISLNK(ent.mode): mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target)) else: # Everything else should be fully described by its # metadata, so just record an empty blob, so the paths # in the tree and .bupm will match up. (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) ent.repack() git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (mode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) metalists[-1].append((sort_key, meta)) if exists and wasmissing: _nonlocal['count'] += oldsize _nonlocal['subcount'] = 0 if opt.progress: pct = total and _nonlocal['count'] * 100.0 / total or 100 progress( 'Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal)) while len(parts) > 1: # _pop() all the parts above the root _pop(force_tree=None) assert (len(shalists) == 1) assert (len(metalists) == 1) # Finish the root directory. tree = _pop( force_tree=None, # When there's a collision, use empty metadata for the root. dir_metadata=metadata.Metadata() if root_collision else None) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.tree: out.write(hexlify(tree)) out.write(b'\n') if opt.commit or name: if compat.py_maj > 2: # Strip b prefix from python 3 bytes reprs to preserve previous format msgcmd = b'[%s]' % b', '.join( [repr(argv_bytes(x))[1:].encode('ascii') for x in argv]) else: msgcmd = repr(argv) msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname())) commit = w.new_commit(tree, oldref, userline, date, None, userline, date, None, msg) if opt.commit: out.write(hexlify(commit)) out.write(b'\n') msr.close() w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def main(): handle_ctrl_c() is_reverse = environ.get(b'BUP_SERVER_REVERSE') opt = parse_args(compat.argv) git.check_repo_or_die() if opt.source: opt.source = argv_bytes(opt.source) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if is_reverse and opt.remote: misuse("don't use -r in reverse mode; it's automatic") if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.remote or is_reverse: dest_repo = RemoteRepo(opt.remote) else: dest_repo = LocalRepo() with dest_repo as dest_repo: with LocalRepo(repo_dir=opt.source) as src_repo: with dest_repo.new_packwriter(compression_level=opt.compress) as writer: # Resolve and validate all sources and destinations, # implicit or explicit, and do it up-front, so we can # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, src_repo, dest_repo) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) handlers = {'ff': handle_ff, 'append': handle_append, 'force-pick': handle_pick, 'pick': handle_pick, 'new-tag': handle_new_tag, 'replace': handle_replace, 'unnamed': handle_unnamed} for item in target_items: debug1('get-spec: %r\n' % (item.spec,)) debug1('get-src: %s\n' % loc_desc(item.src)) debug1('get-dest: %s\n' % loc_desc(item.dest)) dest_path = item.dest and item.dest.path if dest_path: if dest_path.startswith(b'/.tag/'): dest_ref = b'refs/tags/%s' % dest_path[6:] else: dest_ref = b'refs/heads/%s' % dest_path[1:] else: dest_ref = None dest_hash = item.dest and item.dest.hash orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info) orig_ref = orig_ref or dest_hash cur_ref = cur_ref or dest_hash handler = handlers[item.spec.method] item_result = handler(item, src_repo, writer, opt) if len(item_result) > 1: new_id, tree = item_result else: new_id = item_result[0] if not dest_ref: log_item(item.spec.src, item.src.type, opt) else: updated_refs[dest_ref] = (orig_ref, new_id) if dest_ref.startswith(b'refs/tags/'): log_item(item.spec.src, item.src.type, opt, tag=new_id) else: log_item(item.spec.src, item.src.type, opt, tree=tree, commit=new_id) # Only update the refs at the very end, once the writer is # closed, so that if something goes wrong above, the old refs # will be undisturbed. for ref_name, info in items(updated_refs): orig_ref, new_ref = info try: dest_repo.update_ref(ref_name, new_ref, orig_ref) if opt.verbose: new_hex = hexlify(new_ref) if orig_ref: orig_hex = hexlify(orig_ref) log('updated %r (%s -> %s)\n' % (ref_name, orig_hex, new_hex)) else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError) as ex: add_error('unable to update ref %r: %s' % (ref_name, ex)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
if opt.strip_path: o.fatal("--strip-path is incompatible with --graft") for (option, parameter) in flags: if option == "--graft": parameter = argv_bytes(parameter) splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal("a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") graft_points.append((resolve_parent(old_path), resolve_parent(new_path))) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") name = opt.name if name and not valid_save_name(name): o.fatal("'%s' is not a valid branch name" % path_msg(name)) refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: try: cli = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress)
'subtree-id' : subtree_id, 'tree-0-id' : tree_0_id, 'tree-1-id' : tree_1_id, 'tree-2-id' : tree_2_id, 'commit-0-id' : commit_0_id, 'commit-1-id' : commit_1_id, 'commit-2-id' : commit_2_id, 'save-1' : save_1, 'save-2' : save_2, 'subtree-path' : subtree_path, 'subtree-vfs-path' : subtree_vfs_path} # FIXME: this fails in a strange way: # WVPASS given nothing get --ff not-there dispositions_to_test = ('get',) if int(environ.get(b'BUP_TEST_LEVEL', b'0')) >= 11: dispositions_to_test += ('get-on', 'get-to') categories = ('replace', 'universal', 'ff', 'append', 'pick_force', 'pick_noforce', 'new_tag', 'unnamed') @pytest.mark.parametrize("disposition,category", product(dispositions_to_test, categories)) def test_get(tmpdir, disposition, category): chdir(tmpdir) try: src_info = create_get_src() globals().get('_test_' + category)(disposition, src_info) finally: chdir(top)
def test_prune_older(tmpdir): environ[b'GIT_AUTHOR_NAME'] = b'bup test' environ[b'GIT_COMMITTER_NAME'] = b'bup test' environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f' environ[b'GIT_COMMITTER_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f' seed = int(environ.get(b'BUP_TEST_SEED', time())) random.seed(seed) print('random seed:', seed, file=stderr) save_population = int(environ.get(b'BUP_TEST_PRUNE_OLDER_SAVES', 2000)) prune_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_CYCLES', 20)) prune_gc_cycles = int(environ.get(b'BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10)) bup_cmd = bup.path.exe() environ[b'BUP_DIR'] = tmpdir + b'/work/.git' environ[b'GIT_DIR'] = tmpdir + b'/work/.git' now = int(time()) three_years_ago = now - (60 * 60 * 24 * 366 * 3) chdir(tmpdir) ex([b'git', b'init', b'work']) ex([b'git', b'config', b'gc.autoDetach', b'false']) wvstart('generating ' + str(save_population) + ' random saves') chdir(tmpdir + b'/work') save_utcs = create_older_random_saves(save_population, three_years_ago, now) chdir(tmpdir) test_set_hash = exo([b'git', b'show-ref', b'-s', b'master']).out.rstrip() ls_saves = exo((bup_cmd, b'ls', b'master')).out.splitlines() wvpasseq(save_population + 1, len(ls_saves)) wvstart('ensure everything kept, if no keep arguments') ex([b'git', b'reset', b'--hard', test_set_hash]) proc = ex((bup_cmd, b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt', b'%d' % now) \ + (b'master',), stdout=None, stderr=PIPE, check=False) wvpassne(proc.rc, 0) wvpass(b'at least one keep argument is required' in proc.err) check_prune_result(save_utcs) wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles, save_population)) for spec in unique_period_specs(prune_cycles, # Make it more likely we'll have # some outside the save range. three_years_ago - period_scale[b'm'], now): ex([b'git', b'reset', b'--hard', test_set_hash]) expected = sorted(expected_retentions(save_utcs, now, spec)) ex((bup_cmd, b'prune-older', b'-v', b'--unsafe', b'--no-gc', b'--wrt', b'%d' % now) \ + period_spec_to_period_args(spec) \ + (b'master',)) check_prune_result(expected) # More expensive because we have to recreate the repo each time wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles, save_population)) ex([b'git', b'reset', b'--hard', test_set_hash]) copytree(b'work/.git', b'clean-test-repo', symlinks=True) for spec in unique_period_specs(prune_gc_cycles, # Make it more likely we'll have # some outside the save range. three_years_ago - period_scale[b'm'], now): rmtree(b'work/.git') copytree(b'clean-test-repo', b'work/.git') expected = sorted(expected_retentions(save_utcs, now, spec)) ex((bup_cmd, b'prune-older', b'-v', b'--unsafe', b'--wrt', b'%d' % now) \ + period_spec_to_period_args(spec) \ + (b'master',)) check_prune_result(expected)
def test_commit_parsing(): def restore_env_var(name, val): if val is None: del environ[name] else: environ[name] = val def showval(commit, val): return readpipe([b'git', b'show', b'-s', b'--pretty=format:%s' % val, commit]).strip() with no_lingering_errors(): with test_tempdir(b'bup-tgit-') as tmpdir: orig_cwd = os.getcwd() workdir = tmpdir + b'/work' repodir = workdir + b'/.git' orig_author_name = environ.get(b'GIT_AUTHOR_NAME') orig_author_email = environ.get(b'GIT_AUTHOR_EMAIL') orig_committer_name = environ.get(b'GIT_COMMITTER_NAME') orig_committer_email = environ.get(b'GIT_COMMITTER_EMAIL') environ[b'GIT_AUTHOR_NAME'] = b'bup test' environ[b'GIT_COMMITTER_NAME'] = environ[b'GIT_AUTHOR_NAME'] environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f' environ[b'GIT_COMMITTER_EMAIL'] = environ[b'GIT_AUTHOR_EMAIL'] try: readpipe([b'git', b'init', workdir]) environ[b'GIT_DIR'] = environ[b'BUP_DIR'] = repodir git.check_repo_or_die(repodir) os.chdir(workdir) with open('foo', 'w') as f: print('bar', file=f) readpipe([b'git', b'add', b'.']) readpipe([b'git', b'commit', b'-am', b'Do something', b'--author', b'Someone <someone@somewhere>', b'--date', b'Sat Oct 3 19:48:49 2009 -0400']) commit = readpipe([b'git', b'show-ref', b'-s', b'master']).strip() parents = showval(commit, b'%P') tree = showval(commit, b'%T') cname = showval(commit, b'%cn') cmail = showval(commit, b'%ce') cdate = showval(commit, b'%ct') coffs = showval(commit, b'%ci') coffs = coffs[-5:] coff = (int(coffs[-4:-2]) * 60 * 60) + (int(coffs[-2:]) * 60) if bytes_from_byte(coffs[-5]) == b'-': coff = - coff commit_items = git.get_commit_items(commit, git.cp()) WVPASSEQ(commit_items.parents, []) WVPASSEQ(commit_items.tree, tree) WVPASSEQ(commit_items.author_name, b'Someone') WVPASSEQ(commit_items.author_mail, b'someone@somewhere') WVPASSEQ(commit_items.author_sec, 1254613729) WVPASSEQ(commit_items.author_offset, -(4 * 60 * 60)) WVPASSEQ(commit_items.committer_name, cname) WVPASSEQ(commit_items.committer_mail, cmail) WVPASSEQ(commit_items.committer_sec, int(cdate)) WVPASSEQ(commit_items.committer_offset, coff) WVPASSEQ(commit_items.message, b'Do something\n') with open(b'bar', 'wb') as f: f.write(b'baz\n') readpipe([b'git', b'add', '.']) readpipe([b'git', b'commit', b'-am', b'Do something else']) child = readpipe([b'git', b'show-ref', b'-s', b'master']).strip() parents = showval(child, b'%P') commit_items = git.get_commit_items(child, git.cp()) WVPASSEQ(commit_items.parents, [commit]) finally: os.chdir(orig_cwd) restore_env_var(b'GIT_AUTHOR_NAME', orig_author_name) restore_env_var(b'GIT_AUTHOR_EMAIL', orig_author_email) restore_env_var(b'GIT_COMMITTER_NAME', orig_committer_name) restore_env_var(b'GIT_COMMITTER_EMAIL', orig_committer_email)
def defaultrepo(): repo = environ.get(b'BUP_DIR') if repo: return repo return os.path.expanduser(b'~/.bup')
subcmd_name = subcmd[0] if not subcmd_name: usage() try: cmd_module = import_module('bup.cmd.' + subcmd_name.decode('ascii').replace('-', '_')) except ModuleNotFoundError as ex: cmd_module = None if not cmd_module: subcmd[0] = os.path.join(cmdpath, b'bup-' + subcmd_name) if not os.path.exists(subcmd[0]): usage('error: unknown command "%s"' % path_msg(subcmd_name)) already_fixed = int(environ.get(b'BUP_FORCE_TTY', 0)) if subcmd_name in [b'mux', b'ftp', b'help']: already_fixed = True fix_stdout = not already_fixed and os.isatty(1) fix_stderr = not already_fixed and os.isatty(2) if fix_stdout or fix_stderr: tty_env = merge_dict(environ, {b'BUP_FORCE_TTY': (b'%d' % ((fix_stdout and 1 or 0) + (fix_stderr and 2 or 0))), b'BUP_TTY_WIDTH': b'%d' % _tty_width(), }) else: tty_env = environ
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.verbose is None: opt.verbose = 0 if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop or opt.copy): o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") if opt.copy and (opt.blobs or opt.tree): o.fatal('--copy is incompatible with -b, -t') if (opt.noop or opt.copy) and (opt.commit or opt.name): o.fatal('--noop and --copy are incompatible with -c, -n') if opt.blobs and (opt.tree or opt.commit or opt.name): o.fatal('-b is incompatible with -t, -c, -n') if extra and opt.git_ids: o.fatal("don't provide filenames when using --git-ids") if opt.verbose >= 2: git.verbose = opt.verbose - 1 opt.bench = 1 max_pack_size = None if opt.max_pack_size: max_pack_size = parse_num(opt.max_pack_size) max_pack_objects = None if opt.max_pack_objects: max_pack_objects = parse_num(opt.max_pack_objects) if opt.fanout: hashsplit.fanout = parse_num(opt.fanout) if opt.blobs: hashsplit.fanout = 0 if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.date: date = parse_date_or_fatal(opt.date, o.fatal) else: date = time.time() # Hack around lack of nonlocal vars in python 2 total_bytes = [0] def prog(filenum, nbytes): total_bytes[0] += nbytes if filenum > 0: qprogress('Splitting: file #%d, %d kbytes\r' % (filenum + 1, total_bytes[0] // 1024)) else: qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024)) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") start_time = time.time() if opt.name and not valid_save_name(opt.name): o.fatal("'%r' is not a valid branch name." % opt.name) refname = opt.name and b'refs/heads/%s' % opt.name or None if opt.noop or opt.copy: cli = pack_writer = oldref = None elif opt.remote or is_reverse: git.check_repo_or_die() cli = client.Client(opt.remote) oldref = refname and cli.read_ref(refname) or None pack_writer = cli.new_packwriter(compression_level=opt.compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) else: git.check_repo_or_die() cli = None oldref = refname and git.read_ref(refname) or None pack_writer = git.PackWriter(compression_level=opt.compress, max_pack_size=max_pack_size, max_pack_objects=max_pack_objects) input = byte_stream(sys.stdin) if opt.git_ids: # the input is actually a series of git object ids that we should retrieve # and split. # # This is a bit messy, but basically it converts from a series of # CatPipe.get() iterators into a series of file-type objects. # It would be less ugly if either CatPipe.get() returned a file-like object # (not very efficient), or split_to_shalist() expected an iterator instead # of a file. cp = git.CatPipe() class IterToFile: def __init__(self, it): self.it = iter(it) def read(self, size): v = next(self.it, None) return v or b'' def read_ids(): while 1: line = input.readline() if not line: break if line: line = line.strip() try: it = cp.get(line.strip()) next(it, None) # skip the file info except KeyError as e: add_error('error: %s' % e) continue yield IterToFile(it) files = read_ids() else: # the input either comes from a series of files or from stdin. files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input] if pack_writer: new_blob = pack_writer.new_blob new_tree = pack_writer.new_tree elif opt.blobs or opt.tree: # --noop mode new_blob = lambda content: git.calc_hash(b'blob', content) new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist)) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.blobs: shalist = hashsplit.split_to_blobs(new_blob, files, keep_boundaries=opt.keep_boundaries, progress=prog) for (sha, size, level) in shalist: out.write(hexlify(sha) + b'\n') reprogress() elif opt.tree or opt.commit or opt.name: if opt.name: # insert dummy_name which may be used as a restore target mode, sha = \ hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode) shalist = [(mode, splitfile_name, sha)] else: shalist = hashsplit.split_to_shalist( new_blob, new_tree, files, keep_boundaries=opt.keep_boundaries, progress=prog) tree = new_tree(shalist) else: last = 0 it = hashsplit.hashsplit_iter(files, keep_boundaries=opt.keep_boundaries, progress=prog) for (blob, level) in it: hashsplit.total_split += len(blob) if opt.copy: sys.stdout.write(str(blob)) megs = hashsplit.total_split // 1024 // 1024 if not opt.quiet and last != megs: last = megs if opt.verbose: log('\n') if opt.tree: out.write(hexlify(tree) + b'\n') if opt.commit or opt.name: msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb() ref = opt.name and (b'refs/heads/%s' % opt.name) or None userline = b'%s <%s@%s>' % (userfullname(), username(), hostname()) commit = pack_writer.new_commit(tree, oldref, userline, date, None, userline, date, None, msg) if opt.commit: out.write(hexlify(commit) + b'\n') if pack_writer: pack_writer.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() secs = time.time() - start_time size = hashsplit.total_split if opt.bench: log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' % (size / 1024, secs, size / 1024 / secs)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def opts_from_cmdline(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.indexfile: opt.indexfile = argv_bytes(opt.indexfile) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.strip_path: opt.strip_path = argv_bytes(opt.strip_path) if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") if opt.date: opt.date = parse_date_or_fatal(opt.date, o.fatal) else: opt.date = time.time() opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: opt.bwlimit = parse_num(opt.bwlimit) if opt.strip and opt.strip_path: o.fatal("--strip is incompatible with --strip-path") opt.sources = [argv_bytes(x) for x in extra] grafts = [] if opt.graft: if opt.strip: o.fatal("--strip is incompatible with --graft") if opt.strip_path: o.fatal("--strip-path is incompatible with --graft") for (option, parameter) in flags: if option == "--graft": parameter = argv_bytes(parameter) splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal("a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") grafts.append((resolve_parent(old_path), resolve_parent(new_path))) opt.grafts = grafts opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE') if opt.is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") if opt.name and not valid_save_name(opt.name): o.fatal("'%s' is not a valid branch name" % path_msg(opt.name)) return opt