def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # Check for cached metadata cachepath = options.cachepath if not os.path.exists(cachepath + '.params'): raise QuietError("No local metadata found.") param = load_params(cachepath) # Check revision if param['revision'] < CURRENT_FS_REV: raise QuietError('File system revision too old.') elif param['revision'] > CURRENT_FS_REV: raise QuietError('File system revision too new.') if os.path.exists(DBNAME): raise QuietError('%s exists, aborting.' % DBNAME) log.info('Copying database...') dst = tempfile.NamedTemporaryFile() with open(cachepath + '.db', 'rb') as src: shutil.copyfileobj(src, dst) dst.flush() db = Connection(dst.name) log.info('Scrambling...') md5 = lambda x: hashlib.md5(x).hexdigest() for (id_, name) in db.query('SELECT id, name FROM names'): db.execute('UPDATE names SET name=? WHERE id=?', (md5(name), id_)) for (id_, name) in db.query('SELECT inode, target FROM symlink_targets'): db.execute('UPDATE symlink_targets SET target=? WHERE inode=?', (md5(name), id_)) for (id_, name) in db.query('SELECT rowid, value FROM ext_attributes'): db.execute('UPDATE ext_attributes SET value=? WHERE rowid=?', (md5(name), id_)) log.info('Saving...') with open(DBNAME, 'wb+') as fh: dump_metadata(db, fh)
def main(args=None): options = parse_args(args) src_backend_factory = options.src_backend_factory dst_backend_factory = options.dst_backend_factory # Try to access both backends before starting threads try: src_backend_factory().lookup('s3ql_metadata') try: dst_backend_factory().lookup('some random object') except NoSuchObject: pass except DanglingStorageURLError as exc: raise QuietError(str(exc)) from None queue = Queue(maxsize=options.threads) threads = [] for _ in range(options.threads): t = AsyncFn(copy_loop, queue, src_backend_factory, dst_backend_factory) # Don't wait for worker threads, gives deadlock if main thread # terminates with exception t.daemon = True t.start() threads.append(t) with src_backend_factory() as backend: stamp1 = 0 for (i, key) in enumerate(backend): stamp2 = time.time() if stamp2 - stamp1 > 1: stamp1 = stamp2 sys.stdout.write('\rCopied %d objects so far...' % i) sys.stdout.flush() # Terminate early if any thread failed with an exception for t in threads: if not t.is_alive(): t.join_and_raise() # Avoid blocking if all threads terminated while True: try: queue.put(key, timeout=1) except QueueFull: pass else: break for t in threads: if not t.is_alive(): t.join_and_raise() sys.stdout.write('\n') queue.maxsize += len(threads) for t in threads: queue.put(None) for t in threads: t.join_and_raise()
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # Determine available backups backup_list = set(x for x in os.listdir('.') if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x)) if not os.path.exists(options.state) and len(backup_list) > 1: if not options.reconstruct_state: raise QuietError( 'Found more than one backup but no state file! Aborting.') log.warning('Trying to reconstruct state file..') state = upgrade_to_state(backup_list) if not options.n: log.info('Saving reconstructed state..') with open(options.state, 'wb') as fh: fh.write(freeze_basic_mapping(state)) elif not os.path.exists(options.state): log.warning('Creating state file..') state = dict() else: log.info('Reading state...') # Older versions used pickle to store state... with open(options.state, 'rb') as fh: proto = fh.read(2) fh.seek(0) if proto == b'\x80\x02': state = pickle.load(fh) else: state = thaw_basic_mapping(fh.read()) to_delete = process_backups(backup_list, state, options.cycles) for x in to_delete: log.info('Backup %s is no longer needed, removing...', x) if not options.n: if options.use_s3qlrm: s3qlrm([x]) else: shutil.rmtree(x) if options.n: log.info('Dry run, not saving state.') else: log.info('Saving state..') with open(options.state, 'wb') as fh: fh.write(freeze_basic_mapping(state))
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # Determine available backups backup_list = set(x for x in os.listdir('.') if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x)) if not os.path.exists(options.state) and len(backup_list) > 1: if not options.reconstruct_state: raise QuietError( 'Found more than one backup but no state file! Aborting.') log.warning('Trying to reconstruct state file..') state = upgrade_to_state(backup_list) if not options.n: log.info('Saving reconstructed state..') pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL) elif not os.path.exists(options.state): log.warning('Creating state file..') state = dict() else: log.info('Reading state...') state = pickle.load(open(options.state, 'rb')) to_delete = process_backups(backup_list, state, options.cycles) for x in to_delete: log.info('Backup %s is no longer needed, removing...', x) if not options.n: if options.use_s3qlrm: s3qlrm([x]) else: shutil.rmtree(x) if options.n: log.info('Dry run, not saving state.') else: log.info('Saving state..') pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL)
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) src_backends = [] dst_backends = [] try: options.storage_url = options.src_storage_url for _ in range(options.threads + 1): src_backends.append(get_backend(options, plain=True)) options.storage_url = options.dst_storage_url for _ in range(options.threads): dst_backends.append(get_backend(options, plain=True)) except DanglingStorageURLError as exc: raise QuietError(str(exc)) from None queue = Queue(maxsize=options.threads) threads = [] for (src_backend, dst_backend) in zip(src_backends, dst_backends): t = Thread(target=copy_loop, args=(queue, src_backend, dst_backend)) t.start() threads.append(t) for (i, key) in enumerate(src_backends[-1]): if i % 500 == 0 and sys.stdout.isatty(): sys.stdout.write('\rCopied %d objects so far...' % i) sys.stdout.flush() queue.put(key) for t in threads: queue.put(None) for t in threads: t.join() if sys.stdout.isatty(): sys.stdout.write('\n')
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # /dev/urandom may be slow, so we cache the data first log.info('Preparing test data...') rnd_fh = tempfile.TemporaryFile() with open('/dev/urandom', 'rb', 0) as src: copied = 0 while copied < 50 * 1024 * 1024: buf = src.read(BUFSIZE) rnd_fh.write(buf) copied += len(buf) log.info('Measuring throughput to cache...') backend_dir = tempfile.mkdtemp(prefix='s3ql-benchmark-') mnt_dir = tempfile.mkdtemp(prefix='s3ql-mnt') atexit.register(shutil.rmtree, backend_dir) atexit.register(shutil.rmtree, mnt_dir) block_sizes = [2**b for b in range(12, 18)] for blocksize in block_sizes: write_time = 0 size = 50 * 1024 * 1024 while write_time < 3: log.debug('Write took %.3g seconds, retrying', write_time) subprocess.check_call([ exec_prefix + 'mkfs.s3ql', '--plain', 'local://%s' % backend_dir, '--quiet', '--force', '--cachedir', options.cachedir ]) subprocess.check_call([ exec_prefix + 'mount.s3ql', '--threads', '1', '--quiet', '--cachesize', '%d' % (2 * size / 1024), '--log', '%s/mount.log' % backend_dir, '--cachedir', options.cachedir, 'local://%s' % backend_dir, mnt_dir ]) try: size *= 2 with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst: rnd_fh.seek(0) write_time = time.time() copied = 0 while copied < size: buf = rnd_fh.read(blocksize) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) write_time = time.time() - write_time os.unlink('%s/bigfile' % mnt_dir) finally: subprocess.check_call([exec_prefix + 'umount.s3ql', mnt_dir]) fuse_speed = copied / write_time log.info('Cache throughput with %3d KiB blocks: %d KiB/sec', blocksize / 1024, fuse_speed / 1024) # Upload random data to prevent effects of compression # on the network layer log.info('Measuring raw backend throughput..') try: backend = get_backend(options, raw=True) except DanglingStorageURLError as exc: raise QuietError(str(exc)) from None upload_time = 0 size = 512 * 1024 while upload_time < 10: size *= 2 def do_write(dst): rnd_fh.seek(0) stamp = time.time() copied = 0 while copied < size: buf = rnd_fh.read(BUFSIZE) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) return (copied, stamp) (upload_size, upload_time) = backend.perform_write(do_write, 's3ql_testdata') upload_time = time.time() - upload_time backend_speed = upload_size / upload_time log.info('Backend throughput: %d KiB/sec', backend_speed / 1024) backend.delete('s3ql_testdata') src = options.file size = os.fstat(options.file.fileno()).st_size log.info('Test file size: %.2f MiB', (size / 1024**2)) in_speed = dict() out_speed = dict() for alg in ALGS: log.info('compressing with %s-6...', alg) backend = ComprencBackend( b'pass', (alg, 6), Backend(argparse.Namespace(storage_url='local://' + backend_dir))) def do_write(dst): #pylint: disable=E0102 src.seek(0) stamp = time.time() while True: buf = src.read(BUFSIZE) if not buf: break dst.write(buf) return (dst, stamp) (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata') dt = time.time() - stamp in_speed[alg] = size / dt out_speed[alg] = dst_fh.get_obj_size() / dt log.info('%s compression speed: %d KiB/sec per thread (in)', alg, in_speed[alg] / 1024) log.info('%s compression speed: %d KiB/sec per thread (out)', alg, out_speed[alg] / 1024) print('') print('With %d KiB blocks, maximum performance for different compression' % (block_sizes[-1] / 1024), 'algorithms and thread counts is:', '', sep='\n') threads = set([1, 2, 4, 8]) cores = os.sysconf('SC_NPROCESSORS_ONLN') if cores != -1: threads.add(cores) if options.threads: threads.add(options.threads) print('%-26s' % 'Threads:', ('%12d' * len(threads)) % tuple(sorted(threads))) for alg in ALGS: speeds = [] limits = [] for t in sorted(threads): if fuse_speed > t * in_speed[alg]: limit = 'CPU' speed = t * in_speed[alg] else: limit = 'S3QL/FUSE' speed = fuse_speed if speed / in_speed[alg] * out_speed[alg] > backend_speed: limit = 'uplink' speed = backend_speed * in_speed[alg] / out_speed[alg] limits.append(limit) speeds.append(speed / 1024) print('%-26s' % ('Max FS throughput (%s):' % alg), ('%7d KiB/s' * len(threads)) % tuple(speeds)) print('%-26s' % '..limited by:', ('%12s' * len(threads)) % tuple(limits)) print('') print( 'All numbers assume that the test file is representative and that', 'there are enough processor cores to run all active threads in parallel.', 'To compensate for network latency, you should use about twice as', 'many upload threads as indicated by the above table.\n', sep='\n')