def test_restore_over_existing_target(tmpdir): path = tmpdir + b'/foo' os.mkdir(path) dir_m = metadata.from_path(path, archive_path=path, save_symlinks=True) os.rmdir(path) open(path, 'w').close() file_m = metadata.from_path(path, archive_path=path, save_symlinks=True) # Restore dir over file. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore dir over dir. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore file over dir. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over file. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over non-empty dir. os.remove(path) os.mkdir(path) open(path + b'/bar', 'w').close() WVEXCEPT(Exception, file_m.create_path, path, create_symlinks=True) # Restore dir over non-empty dir. os.remove(path + b'/bar') os.mkdir(path + b'/bar') WVEXCEPT(Exception, dir_m.create_path, path, create_symlinks=True)
def test_restore_over_existing_target(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tmetadata-') path = tmpdir + '/foo' os.mkdir(path) dir_m = metadata.from_path(path, archive_path=path, save_symlinks=True) os.rmdir(path) open(path, 'w').close() file_m = metadata.from_path(path, archive_path=path, save_symlinks=True) # Restore dir over file. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore dir over dir. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore file over dir. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over file. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over non-empty dir. os.remove(path) os.mkdir(path) open(path + '/bar', 'w').close() WVEXCEPT(Exception, file_m.create_path, path, create_symlinks=True) # Restore dir over non-empty dir. os.remove(path + '/bar') os.mkdir(path + '/bar') WVEXCEPT(Exception, dir_m.create_path, path, create_symlinks=True) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def test_restore_over_existing_target(): tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) dir_m = metadata.from_path(path, archive_path=path, save_symlinks=True) os.rmdir(path) open(path, 'w').close() file_m = metadata.from_path(path, archive_path=path, save_symlinks=True) # Restore dir over file. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore dir over dir. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore file over dir. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over file. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over non-empty dir. os.remove(path) os.mkdir(path) open(path + '/bar', 'w').close() WVEXCEPT(Exception, file_m.create_path, path, create_symlinks=True) # Restore dir over non-empty dir. os.remove(path + '/bar') os.mkdir(path + '/bar') WVEXCEPT(Exception, dir_m.create_path, path, create_symlinks=True) finally: subprocess.call(['rm', '-rf', tmpdir])
def test_restore_over_existing_target(): with no_lingering_errors(), test_tempdir('bup-tmetadata-') as tmpdir: path = tmpdir + '/foo' os.mkdir(path) dir_m = metadata.from_path(path, archive_path=path, save_symlinks=True) os.rmdir(path) open(path, 'w').close() file_m = metadata.from_path(path, archive_path=path, save_symlinks=True) # Restore dir over file. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore dir over dir. WVPASSEQ(dir_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISDIR(os.stat(path).st_mode)) # Restore file over dir. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over file. WVPASSEQ(file_m.create_path(path, create_symlinks=True), None) WVPASS(stat.S_ISREG(os.stat(path).st_mode)) # Restore file over non-empty dir. os.remove(path) os.mkdir(path) open(path + '/bar', 'w').close() WVEXCEPT(Exception, file_m.create_path, path, create_symlinks=True) # Restore dir over non-empty dir. os.remove(path + '/bar') os.mkdir(path + '/bar') WVEXCEPT(Exception, dir_m.create_path, path, create_symlinks=True)
def test_from_path_error(tmpdir): if is_superuser() or detect_fakeroot(): return path = tmpdir + b'/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 0o000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: print('saved_errors:', helpers.saved_errors, file=sys.stderr) WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('read Linux attr')) clear_errors()
def test_restore_restricted_user_group(): if is_superuser() or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) WVPASSEQ(m.apply_to_path(path), None) orig_uid = m.uid m.uid = 0; m.apply_to_path(path, restore_numeric_ids=True) WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('lchown: ')) clear_errors() m.uid = orig_uid m.gid = 0; m.apply_to_path(path, restore_numeric_ids=True) WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('lchown: ') or os.stat(path).st_gid == m.gid) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_item_mode(): with no_lingering_errors(): mode = S_IFDIR | 0o755 meta = metadata.from_path('.') oid = '\0' * 20 wvpasseq(mode, vfs.item_mode(vfs.Item(oid=oid, meta=mode))) wvpasseq(meta.mode, vfs.item_mode(vfs.Item(oid=oid, meta=meta)))
def update_path_in_index(path, tstart): if opt.verbose >= 2: print('updating {0} in the index'.format(path)) cur = get_current(path) if cur is None: # seems to be missing in the index, so add it return add_path_to_index(path) pst = drecurse.OsFile(path).stat() meta = metadata.from_path(path, statinfo=pst) if not stat.S_ISDIR(cur.mode) and cur.nlink > 1: _hlinks.del_path(cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: _hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) index # always invalidates "faked" entries, because "old != new" # in from_stat(). meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = _msw.store(meta) cur.from_stat(pst, meta_ofs, tstart, check_device=opt.check_device) cur.repack()
def test_handling_of_incorrect_existing_linux_xattrs(): if not is_superuser() or detect_fakeroot(): WVMSG("skipping test -- not superuser") return setup_testfs() for f in glob.glob("testfs/*"): ex("rm", "-rf", f) path = "testfs/foo" open(path, "w").close() xattr.set(path, "foo", "bar", namespace=xattr.NS_USER) m = metadata.from_path(path, archive_path=path, save_symlinks=True) xattr.set(path, "baz", "bax", namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ["user.foo"]) WVPASSEQ(xattr.get(path, "user.foo"), "bar") xattr.set(path, "foo", "baz", namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ["user.foo"]) WVPASSEQ(xattr.get(path, "user.foo"), "bar") xattr.remove(path, "foo", namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ["user.foo"]) WVPASSEQ(xattr.get(path, "user.foo"), "bar") os.chdir(start_dir) cleanup_testfs()
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. with no_lingering_errors(), test_tempdir('bup-tmetadata-') as tmpdir: parent = tmpdir + '/foo' path = parent + '/bar' os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 000) m.apply_to_path(path) print >> sys.stderr, 'saved_errors:', helpers.saved_errors expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set '") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in xrange(len(expected_errors)): WVPASS(str(helpers.saved_errors[i]).startswith(expected_errors[i])) clear_errors()
def test_apply_to_path_restricted_access(): initial_failures = wvfailure_count() if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tmetadata-') parent = tmpdir + '/foo' path = parent + '/bar' os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 000) m.apply_to_path(path) print >> sys.stderr, helpers.saved_errors expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append('xattr.set: ') WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in xrange(len(expected_errors)): WVPASS(str(helpers.saved_errors[i]).startswith(expected_errors[i])) clear_errors() if wvfailure_count() == initial_failures: subprocess.call(['chmod', '-R', 'u+rwX', tmpdir]) subprocess.call(['rm', '-rf', tmpdir])
def test_handling_of_incorrect_existing_linux_xattrs(): if not is_superuser() or detect_fakeroot(): WVMSG('skipping test -- not superuser') return setup_testfs() for f in glob.glob('testfs/*'): ex('rm', '-rf', f) path = 'testfs/foo' open(path, 'w').close() xattr.set(path, 'foo', 'bar', namespace=xattr.NS_USER) m = metadata.from_path(path, archive_path=path, save_symlinks=True) xattr.set(path, 'baz', 'bax', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') xattr.set(path, 'foo', 'baz', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') xattr.remove(path, 'foo', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') os.chdir(start_dir) cleanup_testfs()
def test_from_path_error(): if os.geteuid() == 0 or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' subprocess.call(['mkdir', path]) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) subprocess.call(['chmod', '000', path]) metadata.from_path(path, archive_path=path, save_symlinks=True) errmsg = helpers.saved_errors[0] if helpers.saved_errors else '' WVPASS(errmsg.startswith('read Linux attr')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_apply_to_path_restricted_access(): initial_failures = wvfailure_count() if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tmetadata-') parent = tmpdir + '/foo' path = parent + '/bar' os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 000) m.apply_to_path(path) print >> sys.stderr, helpers.saved_errors expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set '") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in xrange(len(expected_errors)): WVPASS(str(helpers.saved_errors[i]).startswith(expected_errors[i])) clear_errors() if wvfailure_count() == initial_failures: subprocess.call(['chmod', '-R', 'u+rwX', tmpdir]) subprocess.call(['rm', '-rf', tmpdir])
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith("cygwin"): return # chmod 000 isn't effective. tmpdir = tempfile.mkdtemp(prefix="bup-tmetadata-") try: parent = tmpdir + "/foo" path = parent + "/bar" os.mkdir(parent) os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 000) m.apply_to_path(path) print >>sys.stderr, helpers.saved_errors expected_errors = ["utime: "] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append("Linux chattr: ") if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set: ") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in xrange(len(expected_errors)): WVPASS(str(helpers.saved_errors[i]).startswith(expected_errors[i])) clear_errors() finally: subprocess.call(["chmod", "-R", "u+rwX", tmpdir]) subprocess.call(["rm", "-rf", tmpdir])
def test_from_path_error(): if is_superuser() or detect_fakeroot(): return with no_lingering_errors(), test_tempdir('bup-tmetadata-') as tmpdir: path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: print >> sys.stderr, 'saved_errors:', helpers.saved_errors WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('read Linux attr')) clear_errors()
def test_apply_to_path_restricted_access(tmpdir): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. try: parent = tmpdir + b'/foo' path = parent + b'/bar' os.mkdir(parent) os.mkdir(path) clear_errors() if metadata.xattr: try: metadata.xattr.set(path, b'user.buptest', b'bup') except: print("failed to set test xattr") # ignore any failures here - maybe FS cannot do it pass m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(parent, 0o000) m.apply_to_path(path) print(b'saved_errors:', helpers.saved_errors, file=sys.stderr) expected_errors = ['utime: '] if m.linux_attr and _linux_attr_supported(tmpdir): expected_errors.append('Linux chattr: ') if metadata.xattr and m.linux_xattr: expected_errors.append("xattr.set ") WVPASS(len(helpers.saved_errors) == len(expected_errors)) for i in range(len(expected_errors)): assert str(helpers.saved_errors[i]).startswith(expected_errors[i]) finally: clear_errors()
def test_handling_of_incorrect_existing_linux_xattrs(): if not is_superuser() or detect_fakeroot(): pytest.skip('skipping test -- not superuser') return if not setup_testfs(): pytest.skip('unable to load loop module; skipping dependent tests') return for f in glob.glob(b'testfs/*'): ex(b'rm', b'-rf', f) path = b'testfs/foo' open(path, 'w').close() xattr.set(path, b'foo', b'bar', namespace=xattr.NS_USER) m = metadata.from_path(path, archive_path=path, save_symlinks=True) xattr.set(path, b'baz', b'bax', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(remove_selinux(xattr.list(path)), [b'user.foo']) WVPASSEQ(xattr.get(path, b'user.foo'), b'bar') xattr.set(path, b'foo', b'baz', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(remove_selinux(xattr.list(path)), [b'user.foo']) WVPASSEQ(xattr.get(path, b'user.foo'), b'bar') xattr.remove(path, b'foo', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(remove_selinux(xattr.list(path)), [b'user.foo']) WVPASSEQ(xattr.get(path, b'user.foo'), b'bar') cleanup_testfs()
def test_misc(): with no_lingering_errors(): with test_tempdir(b'bup-tvfs-') as tmpdir: bup_dir = tmpdir + b'/bup' environ[b'GIT_DIR'] = bup_dir environ[b'BUP_DIR'] = bup_dir git.repodir = bup_dir data_path = tmpdir + b'/src' os.mkdir(data_path) with open(data_path + b'/file', 'wb+') as tmpfile: tmpfile.write(b'canary\n') symlink(b'file', data_path + b'/symlink') ex((bup_path, b'init')) ex((bup_path, b'index', b'-v', data_path)) ex((bup_path, b'save', b'-d', b'100000', b'-tvvn', b'test', b'--strip', data_path)) repo = LocalRepo() wvstart('readlink') ls_tree = exo((b'git', b'ls-tree', b'test', b'symlink')).out mode, typ, oidx, name = ls_tree.strip().split(None, 3) assert name == b'symlink' link_item = vfs.Item(oid=unhexlify(oidx), meta=int(mode, 8)) wvpasseq(b'file', vfs.readlink(repo, link_item)) ls_tree = exo((b'git', b'ls-tree', b'test', b'file')).out mode, typ, oidx, name = ls_tree.strip().split(None, 3) assert name == b'file' file_item = vfs.Item(oid=unhexlify(oidx), meta=int(mode, 8)) wvexcept(Exception, vfs.readlink, repo, file_item) wvstart('item_size') wvpasseq(4, vfs.item_size(repo, link_item)) wvpasseq(7, vfs.item_size(repo, file_item)) meta = metadata.from_path(fsencode(__file__)) meta.size = 42 fake_item = file_item._replace(meta=meta) wvpasseq(42, vfs.item_size(repo, fake_item)) _, fakelink_item = vfs.resolve(repo, b'/test/latest', follow=False)[-1] wvpasseq(17, vfs.item_size(repo, fakelink_item)) wvstart('augment_item_meta') run_augment_item_meta_tests(repo, b'/test/latest/file', 7, b'/test/latest/symlink', b'file') wvstart('copy_item') # FIXME: this caused StopIteration #_, file_item = vfs.resolve(repo, '/file')[-1] _, file_item = vfs.resolve(repo, b'/test/latest/file')[-1] file_copy = vfs.copy_item(file_item) wvpass(file_copy is not file_item) wvpass(file_copy.meta is not file_item.meta) wvpass(isinstance(file_copy, tuple)) wvpass(file_item.meta.user) wvpass(file_copy.meta.user) file_copy.meta.user = None wvpass(file_item.meta.user)
def test_from_path_error(): if is_superuser() or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: errmsg = _first_err() WVPASS(errmsg.startswith('read Linux attr')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_from_path_error(): if os.geteuid() == 0 or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: errmsg = _first_err() WVPASS(errmsg.startswith('read Linux attr')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_misc(): with no_lingering_errors(): with test_tempdir('bup-tvfs-') as tmpdir: bup_dir = tmpdir + '/bup' environ['GIT_DIR'] = bup_dir environ['BUP_DIR'] = bup_dir git.repodir = bup_dir data_path = tmpdir + '/src' os.mkdir(data_path) with open(data_path + '/file', 'w+') as tmpfile: tmpfile.write(b'canary\n') symlink('file', data_path + '/symlink') ex((bup_path, 'init')) ex((bup_path, 'index', '-v', data_path)) ex((bup_path, 'save', '-d', '100000', '-tvvn', 'test', '--strip', data_path)) repo = LocalRepo() wvstart('readlink') ls_tree = exo(('git', 'ls-tree', 'test', 'symlink')).out mode, typ, oidx, name = ls_tree.strip().split(None, 3) assert name == 'symlink' link_item = vfs.Item(oid=oidx.decode('hex'), meta=int(mode, 8)) wvpasseq('file', vfs.readlink(repo, link_item)) ls_tree = exo(('git', 'ls-tree', 'test', 'file')).out mode, typ, oidx, name = ls_tree.strip().split(None, 3) assert name == 'file' file_item = vfs.Item(oid=oidx.decode('hex'), meta=int(mode, 8)) wvexcept(Exception, vfs.readlink, repo, file_item) wvstart('item_size') wvpasseq(4, vfs.item_size(repo, link_item)) wvpasseq(7, vfs.item_size(repo, file_item)) meta = metadata.from_path(__file__) meta.size = 42 fake_item = file_item._replace(meta=meta) wvpasseq(42, vfs.item_size(repo, fake_item)) wvstart('augment_item_meta') run_augment_item_meta_tests(repo, '/test/latest/file', 7, '/test/latest/symlink', 'file') wvstart('copy_item') # FIXME: this caused StopIteration #_, file_item = vfs.resolve(repo, '/file')[-1] _, file_item = vfs.resolve(repo, '/test/latest/file')[-1] file_copy = vfs.copy_item(file_item) wvpass(file_copy is not file_item) wvpass(file_copy.meta is not file_item.meta) wvpass(isinstance(file_copy, tuple)) wvpass(file_item.meta.user) wvpass(file_copy.meta.user) file_copy.meta.user = None wvpass(file_item.meta.user)
def test_from_path_error(): initial_failures = wvfailure_count() if is_superuser() or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tmetadata-') path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('read Linux attr')) clear_errors() if wvfailure_count() == initial_failures: subprocess.call(['chmod', '-R', 'u+rwX', tmpdir]) subprocess.call(['rm', '-rf', tmpdir])
def test_from_path_error(): if is_superuser() or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix="bup-tmetadata-") try: path = tmpdir + "/foo" os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(path, 000) metadata.from_path(path, archive_path=path, save_symlinks=True) if metadata.get_linux_file_attr: WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith("read Linux attr")) clear_errors() finally: subprocess.call(["chmod", "-R", "u+rwX", tmpdir]) subprocess.call(["rm", "-rf", tmpdir])
def add_path_to_index(path): if opt.verbose >= 2: print('adding {0} to the index'.format(path)) pst = drecurse.OsFile(path).stat() meta = metadata.from_path(path, statinfo=pst) # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = _msw.store(meta) _wi.add(path, pst, meta_ofs) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: _hlinks.add_path(path, pst.st_dev, pst.st_ino)
def test_restore_nonexistent_user_group(): tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' subprocess.call(['mkdir', path]) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) m.owner = max([x.pw_name for x in pwd.getpwall()], key=len) + 'x' m.group = max([x.gr_name for x in grp.getgrall()], key=len) + 'x' WVPASSEQ(m.apply_to_path(path, restore_numeric_ids=True), None) WVPASSEQ(os.stat(path).st_uid, m.uid) WVPASSEQ(os.stat(path).st_gid, m.gid) WVPASSEQ(m.apply_to_path(path, restore_numeric_ids=False), None) WVPASSEQ(os.stat(path).st_uid, os.geteuid()) WVPASSEQ(os.stat(path).st_uid, os.getgid()) finally: subprocess.call(['rm', '-rf', tmpdir])
def test_apply_to_path_restricted_access(): if os.geteuid() == 0 or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(tmpdir, 000) m.apply_to_path(path) errmsg = _first_err() WVPASS(errmsg.startswith('utime: ')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(tmpdir, 000) m.apply_to_path(path) errmsg = _first_err() WVPASS(errmsg.startswith('utime: ')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_restore_nonexistent_user_group(): tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) junk,m.owner = max([(len(x.pw_name), x.pw_name + 'x') for x in pwd.getpwall()]) junk,m.group = max([(len(x.gr_name), x.gr_name + 'x') for x in grp.getgrall()]) WVPASSEQ(m.apply_to_path(path, restore_numeric_ids=True), None) WVPASSEQ(os.stat(path).st_uid, m.uid) WVPASSEQ(os.stat(path).st_gid, m.gid) WVPASSEQ(m.apply_to_path(path, restore_numeric_ids=False), None) WVPASSEQ(os.stat(path).st_uid, m.uid) WVPASSEQ(os.stat(path).st_gid, m.gid) finally: subprocess.call(['rm', '-rf', tmpdir])
def test_apply_to_path_restricted_access(): if is_superuser() or detect_fakeroot(): return if sys.platform.startswith('cygwin'): return # chmod 000 isn't effective. tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' os.mkdir(path) clear_errors() m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) os.chmod(tmpdir, 000) m.apply_to_path(path) WVPASS(len(helpers.saved_errors) == 1) errmsg = _first_err() WVPASS(errmsg.startswith('utime: ')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
def test_handling_of_incorrect_existing_linux_xattrs(): if os.geteuid() != 0 or detect_fakeroot(): return setup_testfs() subprocess.check_call('rm -rf testfs/*', shell=True) path = 'testfs/foo' open(path, 'w').close() xattr.set(path, 'foo', 'bar', namespace=xattr.NS_USER) m = metadata.from_path(path, archive_path=path, save_symlinks=True) xattr.set(path, 'baz', 'bax', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') xattr.set(path, 'foo', 'baz', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') xattr.remove(path, 'foo', namespace=xattr.NS_USER) m.apply_to_path(path, restore_numeric_ids=False) WVPASSEQ(xattr.list(path), ['user.foo']) WVPASSEQ(xattr.get(path, 'user.foo'), 'bar') os.chdir(top_dir) cleanup_testfs()
def test_restore_restricted_user_group(): if os.geteuid() == 0 or detect_fakeroot(): return tmpdir = tempfile.mkdtemp(prefix='bup-tmetadata-') try: path = tmpdir + '/foo' subprocess.call(['mkdir', path]) m = metadata.from_path(path, archive_path=path, save_symlinks=True) WVPASSEQ(m.path, path) WVPASSEQ(m.apply_to_path(path), None) orig_uid = m.uid m.uid = 0; m.apply_to_path(path, restore_numeric_ids=True) errmsg = str(helpers.saved_errors[0]) if helpers.saved_errors else '' WVPASS(errmsg.startswith('lchown: ')) clear_errors() m.uid = orig_uid m.gid = 0; m.apply_to_path(path, restore_numeric_ids=True) errmsg = str(helpers.saved_errors[0]) if helpers.saved_errors else '' WVPASS(errmsg.startswith('lchown: ')) clear_errors() finally: subprocess.call(['rm', '-rf', tmpdir])
if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') if treat_include_fields_as_definitive: active_fields = include_fields treat_include_fields_as_definitive = False else: active_fields = active_fields | include_fields opt.verbose = opt.verbose or 0 opt.quiet = opt.quiet or 0 metadata.verbose = opt.verbose - opt.quiet first_path = True for path in remainder: try: m = metadata.from_path(path, archive_path = path) except (OSError,IOError), e: if e.errno == errno.ENOENT: add_error(e) continue else: raise if metadata.verbose >= 0: if not first_path: print if atime_resolution != 1: m.atime = (m.atime / atime_resolution) * atime_resolution if mtime_resolution != 1: m.mtime = (m.mtime / mtime_resolution) * mtime_resolution if ctime_resolution != 1: m.ctime = (m.ctime / ctime_resolution) * ctime_resolution
def update_index(top, excluded_paths, exclude_rxs): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') hashgen = None if opt.fake_valid: def hashgen(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for (path, pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) index # always invalidates "faked" entries, because "old != new" # in from_stat(). meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.from_stat(pst, meta_ofs, tstart, check_device=opt.check_device) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=hashgen) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino)
def test_item_mode(): mode = S_IFDIR | 0o755 meta = metadata.from_path(b'.') oid = b'\0' * 20 wvpasseq(mode, vfs.item_mode(vfs.Item(oid=oid, meta=mode))) wvpasseq(meta.mode, vfs.item_mode(vfs.Item(oid=oid, meta=meta)))
if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') if treat_include_fields_as_definitive: active_fields = include_fields treat_include_fields_as_definitive = False else: active_fields = active_fields | include_fields opt.verbose = opt.verbose or 0 opt.quiet = opt.quiet or 0 metadata.verbose = opt.verbose - opt.quiet first_path = True for path in remainder: try: m = metadata.from_path(path, archive_path=path) except (OSError, IOError), e: if e.errno == errno.ENOENT: add_error(e) continue else: raise if metadata.verbose >= 0: if not first_path: print print metadata.detailed_str(m, active_fields) first_path = False if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1)
def update_index(top, excluded_paths, exclude_rxs): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') hashgen = None if opt.fake_valid: def hashgen(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() qprogress('Indexing: %d\r' % total) elif not (total % 128): qprogress('Indexing: %d\r' % total) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) index # always invalidates "faked" entries, because "old != new" # in from_stat(). meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.from_stat(pst, meta_ofs, tstart, check_device=opt.check_device) if not (rig.cur.flags & index.IX_HASHVALID): if hashgen: (rig.cur.gitmode, rig.cur.sha) = hashgen(path) rig.cur.flags |= index.IX_HASHVALID if opt.fake_invalid: rig.cur.invalidate() rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError), e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen = hashgen) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino)
if first_root == None: first_root = dirp[0] elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree = None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path, normalized=True) \ if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name meta = metadata.Metadata() _push(dir_name, meta) if not file: if len(parts) == 1: continue # We're at the top level -- keep the current root dir # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name):
dirp = stripped_path_components(dir, extra) elif opt.strip_path: dirp = stripped_path_components(dir, [opt.strip_path]) elif graft_points: dirp = grafted_path_components(graft_points, dir) else: dirp = path_components(dir) while parts > [x[0] for x in dirp]: _pop(force_tree = None) if dir != '/': for path_component in dirp[len(parts):]: dir_name, fs_path = path_component if fs_path: meta = metadata.from_path(fs_path) else: meta = metadata.Metadata() _push(dir_name, meta) if not file: # no filename portion means this is a subdir. But # sub/parentdirectories already handled in the pop/push() part above. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(GIT_MODE_TREE, newtree) ent.repack()
def main(argv): # Hack around lack of nonlocal vars in python 2 _nonlocal = {} o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) if opt.indexfile: opt.indexfile = argv_bytes(opt.indexfile) if opt.name: opt.name = argv_bytes(opt.name) if opt.remote: opt.remote = argv_bytes(opt.remote) if opt.strip_path: opt.strip_path = argv_bytes(opt.strip_path) git.check_repo_or_die() if not (opt.tree or opt.commit or opt.name): o.fatal("use one or more of -t, -c, -n") if not extra: o.fatal("no filenames given") extra = [argv_bytes(x) for x in extra] opt.progress = (istty2 and not opt.quiet) opt.smaller = parse_num(opt.smaller or 0) if opt.bwlimit: client.bwlimit = parse_num(opt.bwlimit) if opt.date: date = parse_date_or_fatal(opt.date, o.fatal) else: date = time.time() if opt.strip and opt.strip_path: o.fatal("--strip is incompatible with --strip-path") graft_points = [] if opt.graft: if opt.strip: o.fatal("--strip is incompatible with --graft") if opt.strip_path: o.fatal("--strip-path is incompatible with --graft") for (option, parameter) in flags: if option == "--graft": parameter = argv_bytes(parameter) splitted_parameter = parameter.split(b'=') if len(splitted_parameter) != 2: o.fatal( "a graft point must be of the form old_path=new_path") old_path, new_path = splitted_parameter if not (old_path and new_path): o.fatal("a graft point cannot be empty") graft_points.append( (resolve_parent(old_path), resolve_parent(new_path))) is_reverse = environ.get(b'BUP_SERVER_REVERSE') if is_reverse and opt.remote: o.fatal("don't use -r in reverse mode; it's automatic") name = opt.name if name and not valid_save_name(name): o.fatal("'%s' is not a valid branch name" % path_msg(name)) refname = name and b'refs/heads/%s' % name or None if opt.remote or is_reverse: try: cli = client.Client(opt.remote) except client.ClientError as e: log('error: %s' % e) sys.exit(1) oldref = refname and cli.read_ref(refname) or None w = cli.new_packwriter(compression_level=opt.compress) else: cli = None oldref = refname and git.read_ref(refname) or None w = git.PackWriter(compression_level=opt.compress) handle_ctrl_c() # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory # elements, in the order they're listed in the index. # # Since the git tree elements are sorted according to # git.shalist_item_sort_key, the metalist items are accumulated as # (sort_key, metadata) tuples, and then sorted when the .bupm file is # created. The sort_key should have been computed using the element's # mangled name and git mode (after hashsplitting), but the code isn't # actually doing that but rather uses the element's real name and mode. # This makes things a bit more difficult when reading it back, see # vfs.ordered_tree_entries(). # Maintain a stack of information representing the current location in # the archive being constructed. The current path is recorded in # parts, which will be something like ['', 'home', 'someuser'], and # the accumulated content and metadata for of the dirs in parts is # stored in parallel stacks in shalists and metalists. parts = [] # Current archive position (stack of dir names). shalists = [] # Hashes for each dir in paths. metalists = [] # Metadata for each dir in paths. def _push(part, metadata): # Enter a new archive directory -- make it the current directory. parts.append(part) shalists.append([]) metalists.append([(b'', metadata)]) # This dir's metadata (no name). def _pop(force_tree, dir_metadata=None): # Leave the current archive directory and add its tree to its parent. assert (len(parts) >= 1) part = parts.pop() shalist = shalists.pop() metalist = metalists.pop() # FIXME: only test if collision is possible (i.e. given --strip, etc.)? if force_tree: tree = force_tree else: names_seen = set() clean_list = [] metaidx = 1 # entry at 0 is for the dir for x in shalist: name = x[1] if name in names_seen: parent_path = b'/'.join(parts) + b'/' add_error('error: ignoring duplicate path %s in %s' % (path_msg(name), path_msg(parent_path))) if not stat.S_ISDIR(x[0]): del metalist[metaidx] else: names_seen.add(name) clean_list.append(x) if not stat.S_ISDIR(x[0]): metaidx += 1 if dir_metadata: # Override the original metadata pushed for this dir. metalist = [(b'', dir_metadata)] + metalist[1:] sorted_metalist = sorted(metalist, key=lambda x: x[0]) metadata = b''.join([m[1].encode() for m in sorted_metalist]) metadata_f = BytesIO(metadata) mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata_f], keep_boundaries=False) clean_list.append((mode, b'.bupm', id)) tree = w.new_tree(clean_list) if shalists: shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree)) return tree _nonlocal['count'] = 0 _nonlocal['subcount'] = 0 _nonlocal['lastremain'] = None def progress_report(n): _nonlocal['subcount'] += n cc = _nonlocal['count'] + _nonlocal['subcount'] pct = total and (cc * 100.0 / total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc / 1024. / elapsed) kps_frac = 10**int(math.log(kps + 1, 10) - 1) kps = int(kps / kps_frac) * kps_frac if cc: remain = elapsed * 1.0 / cc * (total - cc) else: remain = 0.0 if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] < 0.05)): remain = _nonlocal['lastremain'] else: _nonlocal['lastremain'] = remain hours = int(remain / 60 / 60) mins = int(remain / 60 - hours * 60) secs = int(remain - hours * 60 * 60 - mins * 60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs qprogress( 'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr)) indexfile = opt.indexfile or git.repo(b'bupindex') r = index.Reader(indexfile) try: msr = index.MetaStoreReader(indexfile + b'.meta') except IOError as ex: if ex.errno != EACCES: raise log('error: cannot access %r; have you run bup index?' % path_msg(indexfile)) sys.exit(1) hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink') def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() def find_hardlink_target(hlink_db, ent): if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1: link_paths = hlink_db.node_paths(ent.dev, ent.ino) if link_paths: return link_paths[0] total = ftotal = 0 if opt.progress: for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): qprogress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report # Root collisions occur when strip or graft options map more than one # path to the same directory (paths which originally had separate # parents). When that situation is detected, use empty metadata for # the parent. Otherwise, use the metadata for the common parent. # Collision example: "bup save ... --strip /foo /foo/bar /bar". # FIXME: Add collision tests, or handle collisions some other way. # FIXME: Detect/handle strip/graft name collisions (other than root), # i.e. if '/foo/bar' and '/bar' both map to '/'. first_root = None root_collision = None tstart = time.time() fcount = 0 lastskip_name = None lastdir = b'' for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: if opt.verbose: log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue assert (dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, extra) elif opt.strip_path: dirp = stripped_path_components(dir, [opt.strip_path]) elif graft_points: dirp = grafted_path_components(graft_points, dir) else: dirp = path_components(dir) # At this point, dirp contains a representation of the archive # path that looks like [(archive_dir_name, real_fs_path), ...]. # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp # might look like this at some point: # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: first_root = dirp[0] elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree=None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path, normalized=True) \ if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name meta = metadata.Metadata() _push(dir_name, meta) if not file: if len(parts) == 1: continue # We're at the top level -- keep the current root dir # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree=oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(GIT_MODE_TREE, newtree) ent.repack() if exists and wasmissing: _nonlocal['count'] += oldsize continue # it's not a directory if hashvalid: id = ent.sha git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (ent.gitmode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) meta = msr.metadata_at(ent.meta_ofs) meta.hardlink_target = find_hardlink_target(hlink_db, ent) # Restore the times that were cleared to 0 in the metastore. (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) metalists[-1].append((sort_key, meta)) else: id = None hlink = find_hardlink_target(hlink_db, ent) try: meta = metadata.from_path( ent.name, hardlink_target=hlink, normalized=True, after_stat=after_nondir_metadata_stat) except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name continue if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode): # The mode changed since we indexed the file, this is bad. # This can cause two issues: # 1) We e.g. think the file is a regular file, but now it's # something else (a device, socket, FIFO or symlink, etc.) # and _read_ from it when we shouldn't. # 2) We then record it as valid, but don't update the index # metadata, and on a subsequent save it has 'hashvalid' # but is recorded as the file type from the index, when # the content is something else ... # Avoid all of these consistency issues by just skipping such # things - it really ought to not happen anyway. add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name)) lastskip_name = ent.name continue if stat.S_ISREG(ent.mode): try: # If the file changes while we're reading it, then our reading # may stop at some point, but the stat() above may have gotten # a different size already. Recalculate the meta size so that # the repository records the accurate size in the metadata, even # if the other stat() data might be slightly older than the file # content (which we can't fix, this is inherently racy, but we # can prevent the size mismatch.) meta.size = 0 def new_blob(data): meta.size += len(data) return w.new_blob(data) before_saving_regular_file(ent.name) with hashsplit.open_noatime(ent.name) as f: (mode, id) = hashsplit.split_to_blob_or_tree( new_blob, w.new_tree, [f], keep_boundaries=False) except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name elif stat.S_ISDIR(ent.mode): assert (0) # handled above elif stat.S_ISLNK(ent.mode): mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target)) else: # Everything else should be fully described by its # metadata, so just record an empty blob, so the paths # in the tree and .bupm will match up. (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) ent.repack() git_name = git.mangle_name(file, ent.mode, ent.gitmode) git_info = (mode, git_name, id) shalists[-1].append(git_info) sort_key = git.shalist_item_sort_key((ent.mode, file, id)) metalists[-1].append((sort_key, meta)) if exists and wasmissing: _nonlocal['count'] += oldsize _nonlocal['subcount'] = 0 if opt.progress: pct = total and _nonlocal['count'] * 100.0 / total or 100 progress( 'Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal)) while len(parts) > 1: # _pop() all the parts above the root _pop(force_tree=None) assert (len(shalists) == 1) assert (len(metalists) == 1) # Finish the root directory. tree = _pop( force_tree=None, # When there's a collision, use empty metadata for the root. dir_metadata=metadata.Metadata() if root_collision else None) sys.stdout.flush() out = byte_stream(sys.stdout) if opt.tree: out.write(hexlify(tree)) out.write(b'\n') if opt.commit or name: if compat.py_maj > 2: # Strip b prefix from python 3 bytes reprs to preserve previous format msgcmd = b'[%s]' % b', '.join( [repr(argv_bytes(x))[1:].encode('ascii') for x in argv]) else: msgcmd = repr(argv) msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname())) commit = w.new_commit(tree, oldref, userline, date, None, userline, date, None, msg) if opt.commit: out.write(hexlify(commit)) out.write(b'\n') msr.close() w.close() # must close before we can update the ref if opt.name: if cli: cli.update_ref(refname, commit, oldref) else: git.update_ref(refname, commit, oldref) if cli: cli.close() if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
# [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: dir_name, fs_path = dirp[0] first_root = dirp[0] # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() except (OSError, IOError), e: add_error(e) lastskip_name = dir_name else: _push(dir_name, meta) elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree = None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts):]: dir_name, fs_path = path_component
def main(argv): target_filename = b'' active_fields = metadata.all_fields o = options.Options(optspec) (opt, flags, remainder) = o.parse_bytes(argv[1:]) atime_resolution = parse_timestamp_arg('atime', opt.atime_resolution) mtime_resolution = parse_timestamp_arg('mtime', opt.mtime_resolution) ctime_resolution = parse_timestamp_arg('ctime', opt.ctime_resolution) treat_include_fields_as_definitive = True for flag, value in flags: if flag == '--exclude-fields': exclude_fields = frozenset(value.split(',')) for f in exclude_fields: if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') active_fields = active_fields - exclude_fields treat_include_fields_as_definitive = False elif flag == '--include-fields': include_fields = frozenset(value.split(',')) for f in include_fields: if not f in metadata.all_fields: o.fatal(f + ' is not a valid field name') if treat_include_fields_as_definitive: active_fields = include_fields treat_include_fields_as_definitive = False else: active_fields = active_fields | include_fields opt.verbose = opt.verbose or 0 opt.quiet = opt.quiet or 0 metadata.verbose = opt.verbose - opt.quiet sys.stdout.flush() out = byte_stream(sys.stdout) first_path = True for path in remainder: path = argv_bytes(path) try: m = metadata.from_path(path, archive_path=path) except (OSError, IOError) as e: if e.errno == errno.ENOENT: add_error(e) continue else: raise if metadata.verbose >= 0: if not first_path: out.write(b'\n') if atime_resolution != 1: m.atime = (m.atime / atime_resolution) * atime_resolution if mtime_resolution != 1: m.mtime = (m.mtime / mtime_resolution) * mtime_resolution if ctime_resolution != 1: m.ctime = (m.ctime / ctime_resolution) * ctime_resolution out.write(metadata.detailed_bytes(m, active_fields)) out.write(b'\n') first_path = False if saved_errors: log('WARNING: %d errors encountered.\n' % len(saved_errors)) sys.exit(1) else: sys.exit(0)
def save_tree(opt, reader, hlink_db, msr, w): # Metadata is stored in a file named .bupm in each directory. The # first metadata entry will be the metadata for the current directory. # The remaining entries will be for each of the other directory # elements, in the order they're listed in the index. # # Since the git tree elements are sorted according to # git.shalist_item_sort_key, the metalist items are accumulated as # (sort_key, metadata) tuples, and then sorted when the .bupm file is # created. The sort_key should have been computed using the element's # mangled name and git mode (after hashsplitting), but the code isn't # actually doing that but rather uses the element's real name and mode. # This makes things a bit more difficult when reading it back, see # vfs.ordered_tree_entries(). # Maintain a stack of information representing the current location in # the archive being constructed. The current path is recorded in # parts, which will be something like # [StackDir(name=''), StackDir(name='home'), StackDir(name='someuser')], # and the accumulated content and metadata for files in the dirs is stored # in the .items member of the StackDir. stack = [] def _push(part, metadata): # Enter a new archive directory -- make it the current directory. item = StackDir(part, metadata) stack.append(item) def _pop(force_tree=None, dir_metadata=None): # Leave the current archive directory and add its tree to its parent. item = stack.pop() # FIXME: only test if collision is possible (i.e. given --strip, etc.)? if force_tree: tree = force_tree else: names_seen = set() clean_list = [] for x in item.items: name = x.name if name in names_seen: parent_path = b'/'.join(x.name for x in stack) + b'/' add_error('error: ignoring duplicate path %s in %s' % (path_msg(name), path_msg(parent_path))) else: names_seen.add(name) clean_list.append(x) # if set, overrides the original metadata pushed for this dir. if dir_metadata is None: dir_metadata = item.meta metalist = [(b'', dir_metadata)] metalist += [(git.shalist_item_sort_key((entry.mode, entry.name, None)), entry.meta) for entry in clean_list if entry.mode != GIT_MODE_TREE] metalist.sort(key = lambda x: x[0]) metadata = BytesIO(b''.join(m[1].encode() for m in metalist)) mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata], keep_boundaries=False) shalist = [(mode, b'.bupm', id)] shalist += [(entry.gitmode, git.mangle_name(entry.name, entry.mode, entry.gitmode), entry.oid) for entry in clean_list] tree = w.new_tree(shalist) if stack: stack[-1].append(item.name, GIT_MODE_TREE, GIT_MODE_TREE, tree, None) return tree # Hack around lack of nonlocal vars in python 2 _nonlocal = {} _nonlocal['count'] = 0 _nonlocal['subcount'] = 0 _nonlocal['lastremain'] = None def progress_report(n): _nonlocal['subcount'] += n cc = _nonlocal['count'] + _nonlocal['subcount'] pct = total and (cc*100.0/total) or 0 now = time.time() elapsed = now - tstart kps = elapsed and int(cc/1024./elapsed) kps_frac = 10 ** int(math.log(kps+1, 10) - 1) kps = int(kps/kps_frac)*kps_frac if cc: remain = elapsed*1.0/cc * (total-cc) else: remain = 0.0 if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and ((remain - _nonlocal['lastremain'])/_nonlocal['lastremain'] < 0.05)): remain = _nonlocal['lastremain'] else: _nonlocal['lastremain'] = remain hours = int(remain/60/60) mins = int(remain/60 - hours*60) secs = int(remain - hours*60*60 - mins*60) if elapsed < 30: remainstr = '' kpsstr = '' else: kpsstr = '%dk/s' % kps if hours: remainstr = '%dh%dm' % (hours, mins) elif mins: remainstr = '%dm%d' % (mins, secs) else: remainstr = '%ds' % secs qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' % (pct, cc/1024, total/1024, fcount, ftotal, remainstr, kpsstr)) def already_saved(ent): return ent.is_valid() and w.exists(ent.sha) and ent.sha def wantrecurse_pre(ent): return not already_saved(ent) def wantrecurse_during(ent): return not already_saved(ent) or ent.sha_missing() def find_hardlink_target(hlink_db, ent): if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1: link_paths = hlink_db.node_paths(ent.dev, ent.ino) if link_paths: return link_paths[0] return None total = ftotal = 0 if opt.progress: for transname, ent in reader.filter(opt.sources, wantrecurse=wantrecurse_pre): if not (ftotal % 10024): qprogress('Reading index: %d\r' % ftotal) exists = ent.exists() hashvalid = already_saved(ent) ent.set_sha_missing(not hashvalid) if not opt.smaller or ent.size < opt.smaller: if exists and not hashvalid: total += ent.size ftotal += 1 progress('Reading index: %d, done.\n' % ftotal) hashsplit.progress_callback = progress_report # Root collisions occur when strip or graft options map more than one # path to the same directory (paths which originally had separate # parents). When that situation is detected, use empty metadata for # the parent. Otherwise, use the metadata for the common parent. # Collision example: "bup save ... --strip /foo /foo/bar /bar". # FIXME: Add collision tests, or handle collisions some other way. # FIXME: Detect/handle strip/graft name collisions (other than root), # i.e. if '/foo/bar' and '/bar' both map to '/'. first_root = None root_collision = None tstart = time.time() fcount = 0 lastskip_name = None lastdir = b'' for transname, ent in reader.filter(opt.sources, wantrecurse=wantrecurse_during): (dir, file) = os.path.split(ent.name) exists = (ent.flags & index.IX_EXISTS) hashvalid = already_saved(ent) wasmissing = ent.sha_missing() oldsize = ent.size if opt.verbose: if not exists: status = 'D' elif not hashvalid: if ent.sha == index.EMPTY_SHA: status = 'A' else: status = 'M' else: status = ' ' if opt.verbose >= 2: log('%s %-70s\n' % (status, path_msg(ent.name))) elif not stat.S_ISDIR(ent.mode) and lastdir != dir: if not lastdir.startswith(dir): log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b'')))) lastdir = dir if opt.progress: progress_report(0) fcount += 1 if not exists: continue if opt.smaller and ent.size >= opt.smaller: if exists and not hashvalid: if opt.verbose: log('skipping large file "%s"\n' % path_msg(ent.name)) lastskip_name = ent.name continue assert(dir.startswith(b'/')) if opt.strip: dirp = stripped_path_components(dir, opt.sources) elif opt.strip_path: dirp = stripped_path_components(dir, [opt.strip_path]) elif opt.grafts: dirp = grafted_path_components(opt.grafts, dir) else: dirp = path_components(dir) # At this point, dirp contains a representation of the archive # path that looks like [(archive_dir_name, real_fs_path), ...]. # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp # might look like this at some point: # [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: first_root = dirp[0] elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while [x.name for x in stack] > [x[0] for x in dirp]: _pop() # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(stack):]: dir_name, fs_path = path_component # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path, normalized=True) \ if fs_path else metadata.Metadata() except (OSError, IOError) as e: add_error(e) lastskip_name = dir_name meta = metadata.Metadata() _push(dir_name, meta) if not file: if len(stack) == 1: continue # We're at the top level -- keep the current root dir # Since there's no filename, this is a subdir -- finish it. oldtree = already_saved(ent) # may be None newtree = _pop(force_tree = oldtree) if not oldtree: if lastskip_name and lastskip_name.startswith(ent.name): ent.invalidate() else: ent.validate(GIT_MODE_TREE, newtree) ent.repack() if exists and wasmissing: _nonlocal['count'] += oldsize continue # it's not a directory if hashvalid: meta = msr.metadata_at(ent.meta_ofs) meta.hardlink_target = find_hardlink_target(hlink_db, ent) # Restore the times that were cleared to 0 in the metastore. (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime) stack[-1].append(file, ent.mode, ent.gitmode, ent.sha, meta) else: id = None hlink = find_hardlink_target(hlink_db, ent) try: meta = metadata.from_path(ent.name, hardlink_target=hlink, normalized=True, after_stat=after_nondir_metadata_stat) except (OSError, IOError) as e: add_error(e) lastskip_name = ent.name continue if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode): # The mode changed since we indexed the file, this is bad. # This can cause two issues: # 1) We e.g. think the file is a regular file, but now it's # something else (a device, socket, FIFO or symlink, etc.) # and _read_ from it when we shouldn't. # 2) We then record it as valid, but don't update the index # metadata, and on a subsequent save it has 'hashvalid' # but is recorded as the file type from the index, when # the content is something else ... # Avoid all of these consistency issues by just skipping such # things - it really ought to not happen anyway. add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name)) lastskip_name = ent.name continue if stat.S_ISREG(ent.mode): try: # If the file changes while we're reading it, then our reading # may stop at some point, but the stat() above may have gotten # a different size already. Recalculate the meta size so that # the repository records the accurate size in the metadata, even # if the other stat() data might be slightly older than the file # content (which we can't fix, this is inherently racy, but we # can prevent the size mismatch.) meta.size = 0 def new_blob(data): meta.size += len(data) return w.new_blob(data) before_saving_regular_file(ent.name) with hashsplit.open_noatime(ent.name) as f: (mode, id) = hashsplit.split_to_blob_or_tree( new_blob, w.new_tree, [f], keep_boundaries=False) except (IOError, OSError) as e: add_error('%s: %s' % (ent.name, e)) lastskip_name = ent.name elif stat.S_ISDIR(ent.mode): assert(0) # handled above elif stat.S_ISLNK(ent.mode): mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target)) else: # Everything else should be fully described by its # metadata, so just record an empty blob, so the paths # in the tree and .bupm will match up. (mode, id) = (GIT_MODE_FILE, w.new_blob(b'')) if id: ent.validate(mode, id) ent.repack() stack[-1].append(file, ent.mode, ent.gitmode, id, meta) if exists and wasmissing: _nonlocal['count'] += oldsize _nonlocal['subcount'] = 0 if opt.progress: pct = total and _nonlocal['count']*100.0/total or 100 progress('Saving: %.2f%% (%d/%dk, %d/%d files), done. \n' % (pct, _nonlocal['count']/1024, total/1024, fcount, ftotal)) while len(stack) > 1: # _pop() all the parts above the root _pop() # Finish the root directory. # When there's a collision, use empty metadata for the root. tree = _pop(dir_metadata = metadata.Metadata() if root_collision else None) return tree
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') fake_hash = None if opt.fake_valid: def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for path, pst in recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs, xdev_exceptions=xdev_exceptions): if opt.verbose >= 2 or (opt.verbose == 1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed need_repack = False if (rig.cur.stale(pst, tstart, check_device=opt.check_device)): try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) # "faked" entries will be stale(), and so we'll invalidate # them below. meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.update_from_stat(pst, meta_ofs) rig.cur.invalidate() need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): if fake_hash: rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID need_repack = True if opt.fake_invalid: rig.cur.invalidate() need_repack = True if need_repack: rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) hlinks.prepare_save() if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile, msw, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? mi.add_ixentry(e) ri.close() mi.close() wr.close() wi.abort() else: wi.close() msw.close() hlinks.commit_save()
# [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...]. # This dual representation supports stripping/grafting, where the # archive path may not have a direct correspondence with the # filesystem. The root directory is represented by an initial # component named '', and any component that doesn't have a # corresponding filesystem directory (due to grafting, for # example) will have a real_fs_path of None, i.e. [('', None), # ...]. if first_root == None: dir_name, fs_path = dirp[0] first_root = dirp[0] # Not indexed, so just grab the FS metadata or use empty metadata. try: meta = metadata.from_path(fs_path) if fs_path else metadata.Metadata() except (OSError, IOError), e: add_error(e) lastskip_name = dir_name else: _push(dir_name, meta) elif first_root != dirp[0]: root_collision = True # If switching to a new sub-tree, finish the current sub-tree. while parts > [x[0] for x in dirp]: _pop(force_tree=None) # If switching to a new sub-tree, start a new sub-tree. for path_component in dirp[len(parts) :]: dir_name, fs_path = path_component
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions): # tmax and start must be epoch nanoseconds. tmax = (time.time() - 1) * 10**9 ri = index.Reader(indexfile) msw = index.MetaStoreWriter(indexfile + '.meta') wi = index.Writer(indexfile, msw, tmax) rig = IterHelper(ri.iter(name=top)) tstart = int(time.time()) * 10**9 hlinks = hlinkdb.HLinkDB(indexfile + '.hlink') fake_hash = None if opt.fake_valid: def fake_hash(name): return (GIT_MODE_FILE, index.FAKE_SHA) total = 0 bup_dir = os.path.abspath(git.repo()) index_start = time.time() for path, pst in recursive_dirlist([top], xdev=opt.xdev, bup_dir=bup_dir, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs, xdev_exceptions=xdev_exceptions): if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): sys.stdout.write('%s\n' % path) sys.stdout.flush() elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) elif not (total % 128): elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) total += 1 while rig.cur and rig.cur.name > path: # deleted paths if rig.cur.exists(): rig.cur.set_deleted() rig.cur.repack() if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): hlinks.del_path(rig.cur.name) rig.next() if rig.cur and rig.cur.name == path: # paths that already existed need_repack = False if(rig.cur.stale(pst, tstart, check_device=opt.check_device)): try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) rig.next() continue if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: hlinks.del_path(rig.cur.name) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) # Clear these so they don't bloat the store -- they're # already in the index (since they vary a lot and they're # fixed length). If you've noticed "tmax", you might # wonder why it's OK to do this, since that code may # adjust (mangle) the index mtime and ctime -- producing # fake values which must not end up in a .bupm. However, # it looks like that shouldn't be possible: (1) When # "save" validates the index entry, it always reads the # metadata from the filesytem. (2) Metadata is only # read/used from the index if hashvalid is true. (3) # "faked" entries will be stale(), and so we'll invalidate # them below. meta.ctime = meta.mtime = meta.atime = 0 meta_ofs = msw.store(meta) rig.cur.update_from_stat(pst, meta_ofs) rig.cur.invalidate() need_repack = True if not (rig.cur.flags & index.IX_HASHVALID): if fake_hash: rig.cur.gitmode, rig.cur.sha = fake_hash(path) rig.cur.flags |= index.IX_HASHVALID need_repack = True if opt.fake_invalid: rig.cur.invalidate() need_repack = True if need_repack: rig.cur.repack() rig.next() else: # new paths try: meta = metadata.from_path(path, statinfo=pst) except (OSError, IOError) as e: add_error(e) continue # See same assignment to 0, above, for rationale. meta.atime = meta.mtime = meta.ctime = 0 meta_ofs = msw.store(meta) wi.add(path, pst, meta_ofs, hashgen=fake_hash) if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: hlinks.add_path(path, pst.st_dev, pst.st_ino) elapsed = time.time() - index_start paths_per_sec = total / elapsed if elapsed else 0 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) hlinks.prepare_save() if ri.exists(): ri.save() wi.flush() if wi.count: wr = wi.new_reader() if opt.check: log('check: before merging: oldfile\n') check_index(ri) log('check: before merging: newfile\n') check_index(wr) mi = index.Writer(indexfile, msw, tmax) for e in index.merge(ri, wr): # FIXME: shouldn't we remove deleted entries eventually? When? mi.add_ixentry(e) ri.close() mi.close() wr.close() wi.abort() else: wi.close() msw.close() hlinks.commit_save()