Ejemplo n.º 1
0
Archivo: ctrl.py Proyecto: drewlu/ossql
def main(args=None):
    '''Control a mounted S3QL File System.'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    path = options.mountpoint
    
    if not os.path.exists(path):
        raise QuietError('Mountpoint %r does not exist' % path)
    
    ctrlfile = os.path.join(path, CTRL_NAME)
    if not (CTRL_NAME not in llfuse.listdir(path)
            and os.path.exists(ctrlfile)):
        raise QuietError('Mountpoint is not an S3QL file system')

    if os.stat(ctrlfile).st_uid != os.geteuid() and os.geteuid() != 0:
        raise QuietError('Only root and the mounting user may run s3qlctrl.')
    
    if options.action == 'flushcache':
        llfuse.setxattr(ctrlfile, 's3ql_flushcache!', 'dummy')

    if options.action == 'upload-meta':
        llfuse.setxattr(ctrlfile, 'upload-meta', 'dummy')
        
    elif options.action == 'log':
        llfuse.setxattr(ctrlfile, 'logging', 
                      pickle.dumps((options.level, options.modules),
                                   pickle.HIGHEST_PROTOCOL))

    elif options.action == 'cachesize':
        llfuse.setxattr(ctrlfile, 'cachesize', pickle.dumps(options.cachesize*1024))    
Ejemplo n.º 2
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    with get_backend(options.src, options.authfile,
                     options.ssl) as (src_conn, src_name):
        
        if not src_name in src_conn:
            raise QuietError("Source bucket does not exist.")
        src_bucket = src_conn.get_bucket(src_name)
        
        try:
            unlock_bucket(options.authfile, options.src, src_bucket)
        except ChecksumError:
            raise QuietError('Checksum error - incorrect password?')
        
        with get_backend(options.dest, options.authfile,
                         options.ssl) as (dest_conn, dest_name):
        
            if dest_name in dest_conn:
                raise QuietError("Bucket already exists!\n"
                                 "(you can delete an existing bucket with s3qladm --delete)\n")

            dest_bucket = dest_conn.create_bucket(dest_name, compression=None)            

            copy_objects(src_bucket, dest_bucket)
Ejemplo n.º 3
0
def main(args=None):
    '''Umount S3QL file system
    
    This function writes to stdout/stderr and calls `system.exit()` instead
    of returning.
    '''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
    mountpoint = options.mountpoint
        
    # Check if it's a mount point
    if not posixpath.ismount(mountpoint):
        print('Not a mount point.', file=sys.stderr)
        sys.exit(1)

    # Check if it's an S3QL mountpoint
    ctrlfile = os.path.join(mountpoint, CTRL_NAME)
    if not (CTRL_NAME not in llfuse.listdir(mountpoint)
            and os.path.exists(ctrlfile)):
        print('Not an S3QL file system.', file=sys.stderr)
        sys.exit(1)

    if options.lazy:
        lazy_umount(mountpoint)
    else:
        blocking_umount(mountpoint)
Ejemplo n.º 4
0
def main(args=None):
    '''Recursively delete files and directories in an S3QL file system'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
    
    for name in options.path:
        if not os.path.exists(name):
            raise QuietError('%r does not exist' % name)
    
        parent = os.path.dirname(os.path.abspath(name))
        fstat_p = os.stat(parent)
        fstat = os.stat(name)
    
        if fstat_p.st_dev != fstat.st_dev:
            raise QuietError('%s is a mount point itself.' % name)
        
        ctrlfile = os.path.join(parent, CTRL_NAME)
        if not (CTRL_NAME not in llfuse.listdir(parent) and os.path.exists(ctrlfile)):
            raise QuietError('%s is not on an S3QL file system' % name)

        if os.stat(ctrlfile).st_uid != os.geteuid():
            raise QuietError('Only root and the mounting user may run s3qlrm.')
    
        llfuse.setxattr(ctrlfile, 'rmtree', pickle.dumps((fstat_p.st_ino, 
                                                          os.path.basename(name)),
                                                          pickle.HIGHEST_PROTOCOL))
Ejemplo n.º 5
0
Archivo: adm.py Proyecto: drewlu/ossql
def main(args=None):
    '''Change or show S3QL file system parameters'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Check if fs is mounted on this computer
    # This is not foolproof but should prevent common mistakes
    match = options.storage_url + ' /'
    with open('/proc/mounts', 'r') as fh:
        for line in fh:
            if line.startswith(match):
                raise QuietError('Can not work on mounted file system.')
               
    if options.action == 'clear':
        return clear(get_bucket(options, plain=True),
                     get_bucket_cachedir(options.storage_url, options.cachedir))
    
    if options.action == 'upgrade':
        return upgrade(get_possibly_old_bucket(options))
        
    bucket = get_bucket(options)
    
    if options.action == 'passphrase':
        return change_passphrase(bucket)

    if options.action == 'download-metadata':
        return download_metadata(bucket, options.storage_url)
Ejemplo n.º 6
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    fsck = ROFsck(options.path)
    fsck.check()
Ejemplo n.º 7
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Check for cached metadata
    cachepath = get_backend_cachedir(options.storage_url, options.cachedir)
    if not os.path.exists(cachepath + '.params'):
        raise QuietError("No local metadata found.")            

    param = pickle.load(open(cachepath + '.params', 'rb'))
            
    # Check revision
    if param['revision'] < CURRENT_FS_REV:
        raise QuietError('File system revision too old.')
    elif param['revision'] > CURRENT_FS_REV:
        raise QuietError('File system revision too new.')

    if os.path.exists(DBNAME):
        raise QuietError('%s exists, aborting.' % DBNAME)
    
    log.info('Copying database...')
    dst = tempfile.NamedTemporaryFile()
    with open(cachepath + '.db', 'rb') as src:
        shutil.copyfileobj(src, dst)
    dst.flush()
    db = Connection(dst.name)
     
    log.info('Scrambling...')
    md5 = lambda x: hashlib.md5(x).hexdigest()
    for (id_, name) in db.query('SELECT id, name FROM names'):
        db.execute('UPDATE names SET name=? WHERE id=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT inode, target FROM symlink_targets'):
        db.execute('UPDATE symlink_targets SET target=? WHERE inode=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT rowid, value FROM ext_attributes'):
        db.execute('UPDATE ext_attributes SET value=? WHERE rowid=?',
                   (md5(name), id_))
    
    log.info('Saving...')
    with open(DBNAME, 'wb+') as fh:
        dump_metadata(db, fh)
Ejemplo n.º 8
0
def simulate(args):

    options = parse_args(args)
    setup_logging(options)
        
    state = dict()
    backup_list = set()
    for i in xrange(50):
        backup_list.add('backup-%2d' % i)
        delete = process_backups(backup_list, state, options.cycles)
        log.info('Deleting %s', delete)
        backup_list -= delete

        log.info('Available backups on day %d:', i)
        for x in sorted(backup_list):
            log.info(x)  
Ejemplo n.º 9
0
Archivo: cp.py Proyecto: drewlu/ossql
def main(args=None):
    '''Efficiently copy a directory tree'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    if not os.path.exists(options.source):
        raise QuietError('Source directory %r does not exist' % options.source)

    if os.path.exists(options.target):
        raise QuietError('Target directory %r must not yet exist.' % options.target)

    parent = os.path.dirname(os.path.abspath(options.target))
    if not os.path.exists(parent):
        raise QuietError('Target parent %r does not exist' % parent)

    fstat_s = os.stat(options.source)
    fstat_p = os.stat(parent)
    if not stat.S_ISDIR(fstat_s.st_mode):
        raise QuietError('Source %r is not a directory' % options.source)

    if not stat.S_ISDIR(fstat_p.st_mode):
        raise QuietError('Target parent %r is not a directory' % parent)

    if fstat_p.st_dev != fstat_s.st_dev:
        raise QuietError('Source and target are not on the same file system.')

    ctrlfile = os.path.join(parent, CTRL_NAME)
    if not (CTRL_NAME not in llfuse.listdir(parent) and os.path.exists(ctrlfile)):
        raise QuietError('Source and target are not on an S3QL file system')

    if os.stat(ctrlfile).st_uid != os.geteuid() and os.geteuid() != 0:
        raise QuietError('Only root and the mounting user may run s3qlcp.')
        
    try:
        os.mkdir(options.target)
    except OSError as exc:
        if exc.errno == errno.EACCES:
            raise QuietError('No permission to create target directory')
        else:
            raise
    
    fstat_t = os.stat(options.target)
    llfuse.setxattr(ctrlfile, 'copy', struct.pack('II', fstat_s.st_ino, fstat_t.st_ino))
Ejemplo n.º 10
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    try:
        options.storage_url = options.src_storage_url
        src_backend = get_backend(options, plain=True)

        options.storage_url = options.dst_storage_url
        dst_backend = get_backend(options, plain=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc))

    tmpfh = tempfile.TemporaryFile()
    for (i, key) in enumerate(src_backend):
        if sys.stdout.isatty():
            sys.stdout.write('\rCopied %d objects so far...' % i)
            sys.stdout.flush()
        metadata = src_backend.lookup(key)

        log.debug('reading object %s', key)
        def do_read(fh):
            tmpfh.seek(0)
            tmpfh.truncate()
            while True:
                buf = fh.read(BUFSIZE)
                if not buf:
                    break
                tmpfh.write(buf)
        src_backend.perform_read(do_read, key)
        
        log.debug('writing object %s', key)
        def do_write(fh):
            tmpfh.seek(0)
            while True:
                buf = tmpfh.read(BUFSIZE)
                if not buf:
                    break
                fh.write(buf)
        dst_backend.perform_write(do_write, key, metadata)

    print('')
Ejemplo n.º 11
0
def main(args=None):
    '''Print file system statistics to sys.stdout'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
    mountpoint = options.mountpoint

    # Check if it's a mount point
    if not posixpath.ismount(mountpoint):
        raise QuietError('%s is not a mount point' % mountpoint)

    # Check if it's an S3QL mountpoint
    ctrlfile = os.path.join(mountpoint, CTRL_NAME)
    if not (CTRL_NAME not in llfuse.listdir(mountpoint)
            and os.path.exists(ctrlfile)):
        raise QuietError('%s is not a mount point' % mountpoint)

    if os.stat(ctrlfile).st_uid != os.geteuid() and os.geteuid() != 0:
        raise QuietError('Only root and the mounting user may run s3qlstat.')
    
    # Use a decent sized buffer, otherwise the statistics have to be
    # calculated thee(!) times because we need to invoce getxattr
    # three times.
    buf = llfuse.getxattr(ctrlfile, b's3qlstat', size_guess=256)

    (entries, blocks, inodes, fs_size, dedup_size,
     compr_size, db_size) = struct.unpack('QQQQQQQ', buf)
    p_dedup = dedup_size * 100 / fs_size if fs_size else 0
    p_compr_1 = compr_size * 100 / fs_size if fs_size else 0
    p_compr_2 = compr_size * 100 / dedup_size if dedup_size else 0
    mb= 1024**2
    print ('Directory entries:    %d' % entries,
           'Inodes:               %d' % inodes, 
           'Data blocks:          %d' % blocks,
           'Total data size:      %.2f MB' % (fs_size/mb),
           'After de-duplication: %.2f MB (%.2f%% of total)' 
             % (dedup_size / mb, p_dedup),
           'After compression:    %.2f MB (%.2f%% of total, %.2f%% of de-duplicated)'
             % (compr_size /mb, p_compr_1, p_compr_2),
           'Database size:        %.2f MB (uncompressed)' % (db_size / mb),
           '(some values do not take into account not-yet-uploaded dirty blocks in cache)', 
           sep='\n')
Ejemplo n.º 12
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError('Found more than one backup but no state file! Aborting.')

        log.warn('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            pickle.dump(state, open(options.state, 'wb'), pickle.HIGHEST_PROTOCOL)
    elif not os.path.exists(options.state):
        log.warn('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        state = pickle.load(open(options.state, 'rb'))

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        pickle.dump(state, open(options.state, 'wb'), pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 13
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    pool = ('abcdefghijklmnopqrstuvwxyz',
             'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
             '0123456789')
    steps = [ len(x) / (options.processes - 1) for x in pool ]
    prefixes = list()
    for i in range(options.processes - 1):
        parts = [ x[int(i * y):int((i + 1) * y)] for (x, y) in zip(pool, steps) ]
        prefixes.append(''.join(parts))

    filters = [ '-! [%s]*' % x for x in prefixes ]

    # Catch all
    filters.append('- [%s]*' % ''.join(prefixes))

    rsync_args = [ 'rsync', '-f', '+ */' ]
    if not options.quiet:
        rsync_args.append('--out-format')
        rsync_args.append('%n%L')
    if options.a:
        rsync_args.append('-aHAX')

    processes = list()
    for filter_ in filters:
        cmd = rsync_args + [ '-f', filter_ ] + options.pps
        log.debug('Calling %s', cmd)
        processes.append(subprocess.Popen(cmd))

    if all([ c.wait() == 0 for c in processes]):
        sys.exit(0)
    else:
        sys.exit(1)
Ejemplo n.º 14
0
Archivo: fsck.py Proyecto: drewlu/ossql
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
        
    # Check if fs is mounted on this computer
    # This is not foolproof but should prevent common mistakes
    match = options.storage_url + ' /'
    with open('/proc/mounts', 'r') as fh:
        for line in fh:
            if line.startswith(match):
                raise QuietError('Can not check mounted file system.')
    

    bucket = get_bucket(options)
    
    cachepath = get_bucket_cachedir(options.storage_url, options.cachedir)
    seq_no = get_seq_no(bucket)
    param_remote = bucket.lookup('s3ql_metadata')
    db = None
    
    if os.path.exists(cachepath + '.params'):
        assert os.path.exists(cachepath + '.db')
        param = pickle.load(open(cachepath + '.params', 'rb'))
        if param['seq_no'] < seq_no:
            log.info('Ignoring locally cached metadata (outdated).')
            param = bucket.lookup('s3ql_metadata')
        else:
            log.info('Using cached metadata.')
            db = Connection(cachepath + '.db')
            assert not os.path.exists(cachepath + '-cache') or param['needs_fsck']
    
        if param_remote['seq_no'] != param['seq_no']:
            log.warn('Remote metadata is outdated.')
            param['needs_fsck'] = True
            
    else:
        param = param_remote
        assert not os.path.exists(cachepath + '-cache')
        # .db might exist if mount.s3ql is killed at exactly the right instant
        # and should just be ignored.
       
    # Check revision
    if param['revision'] < CURRENT_FS_REV:
        raise QuietError('File system revision too old, please run `s3qladm upgrade` first.')
    elif param['revision'] > CURRENT_FS_REV:
        raise QuietError('File system revision too new, please update your '
                         'S3QL installation.')
    
    if param['seq_no'] < seq_no:
        if bucket.is_get_consistent():
            print(textwrap.fill(textwrap.dedent('''\
                  Up to date metadata is not available. Probably the file system has not
                  been properly unmounted and you should try to run fsck on the computer 
                  where the file system has been mounted most recently.
                  ''')))
        else:
            print(textwrap.fill(textwrap.dedent('''\
                  Up to date metadata is not available. Either the file system has not
                  been unmounted cleanly or the data has not yet propagated through the backend.
                  In the later case, waiting for a while should fix the problem, in
                  the former case you should try to run fsck on the computer where
                  the file system has been mounted most recently
                  ''')))
    
        print('Enter "continue" to use the outdated data anyway:',
              '> ', sep='\n', end='')
        if options.batch:
            raise QuietError('(in batch mode, exiting)')
        if sys.stdin.readline().strip() != 'continue':
            raise QuietError()
        
        param['seq_no'] = seq_no
        param['needs_fsck'] = True
    
    
    if (not param['needs_fsck'] 
        and ((time.time() - time.timezone) - param['last_fsck'])
             < 60 * 60 * 24 * 31): # last check more than 1 month ago
        if options.force:
            log.info('File system seems clean, checking anyway.')
        else:
            log.info('File system is marked as clean. Use --force to force checking.')
            return
    
    # If using local metadata, check consistency
    if db:
        log.info('Checking DB integrity...')
        try:
            # get_list may raise CorruptError itself
            res = db.get_list('PRAGMA integrity_check(20)')
            if res[0][0] != u'ok':
                log.error('\n'.join(x[0] for x in res ))
                raise apsw.CorruptError()
        except apsw.CorruptError:
            raise QuietError('Local metadata is corrupted. Remove or repair the following '
                             'files manually and re-run fsck:\n'
                             + cachepath + '.db (corrupted)\n'
                             + cachepath + '.param (intact)')
    else:
        log.info("Downloading & uncompressing metadata...")
        os.close(os.open(cachepath + '.db.tmp', os.O_RDWR | os.O_CREAT | os.O_TRUNC,
                         stat.S_IRUSR | stat.S_IWUSR)) 
        db = Connection(cachepath + '.db.tmp', fast_mode=True)
        with bucket.open_read("s3ql_metadata") as fh:
            restore_metadata(fh, db)
        db.close()
        os.rename(cachepath + '.db.tmp', cachepath + '.db')
        db = Connection(cachepath + '.db')
    
    # Increase metadata sequence no 
    param['seq_no'] += 1
    param['needs_fsck'] = True
    bucket['s3ql_seq_no_%d' % param['seq_no']] = 'Empty'
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
    
    fsck = Fsck(cachepath + '-cache', bucket, param, db)
    fsck.check()
    
    if fsck.uncorrectable_errors:
        raise QuietError("Uncorrectable errors found, aborting.")
        
    if os.path.exists(cachepath + '-cache'):
        os.rmdir(cachepath + '-cache')
        
    log.info('Saving metadata...')
    fh = tempfile.TemporaryFile()
    dump_metadata(fh, db)  
            
    log.info("Compressing & uploading metadata..")
    cycle_metadata(bucket)
    fh.seek(0)
    param['needs_fsck'] = False
    param['last_fsck'] = time.time() - time.timezone
    param['last-modified'] = time.time() - time.timezone
    with bucket.open_write("s3ql_metadata", param) as dst:
        fh.seek(0)
        shutil.copyfileobj(fh, dst)
    fh.close()
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
        
    db.execute('ANALYZE')
    db.execute('VACUUM')
    db.close() 
Ejemplo n.º 15
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    args = ['local:///home/nikratio/tmp/bucket', '/home/nikratio/tmp/testfile0']

    options = parse_args(args)
    setup_logging(options)

    size = 100*1024*1024 # KB
    log.info('Measuring throughput to cache...')
    bucket_dir = tempfile.mkdtemp()
    mnt_dir = tempfile.mkdtemp()
    atexit.register(shutil.rmtree, bucket_dir)
    atexit.register(shutil.rmtree, mnt_dir)
    subprocess.check_call(['mkfs.s3ql', '--plain', 'local://%s' % bucket_dir,
                           '--quiet', '--cachedir', options.cachedir])
    subprocess.check_call(['mount.s3ql', '--threads', '1', '--quiet', 
                           '--cachesize', '%d' % (2 * size/1024), '--log',
                           '%s/mount.log' % bucket_dir, '--cachedir', options.cachedir,
                           'local://%s' % bucket_dir, mnt_dir])
    with open('/dev/urandom', 'rb', 0) as src:
        with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst:
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = src.read(256*1024)
                dst.write(buf)
                copied += len(buf)
            fuse_speed = copied / (time.time() - stamp)
    os.unlink('%s/bigfile' % mnt_dir)
    subprocess.check_call(['umount.s3ql', mnt_dir])                           
    log.info('Cache throughput: %.2f KB/sec', fuse_speed / 1024)

    # Upload random data to prevent effects of compression
    # on the network layer
    log.info('Measuring raw backend throughput..')
    bucket = get_bucket(options, plain=True)
    with bucket.open_write('s3ql_testdata') as dst:
        with open('/dev/urandom', 'rb', 0) as src:
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = src.read(256*1024)
                dst.write(buf)
                copied += len(buf)
            upload_speed = copied / (time.time() - stamp)
    log.info('Backend throughput: %.2f KB/sec', upload_speed / 1024)
    bucket.delete('s3ql_testdata')
    
    src = options.file
    size = os.fstat(options.file.fileno()).st_size 
    log.info('Test file size: %.2f MB', (size / 1024**2))
        
    times = dict()
    out_sizes = dict()
    for alg in ('lzma', 'bzip2', 'zlib'):
        log.info('compressing with %s...', alg)
        bucket = BetterBucket('pass', alg, Bucket(bucket_dir, None, None))
        with bucket.open_write('s3ql_testdata') as dst:
            src.seek(0)
            stamp = time.time()
            while True:
                buf = src.read(256*1024)
                if not buf:
                    break
                dst.write(buf)
            times[alg] = time.time() - stamp
            out_sizes[alg] = dst.compr_size
        log.info('%s compression speed: %.2f KB/sec (in)', alg, size/times[alg]/1024)
        log.info('%s compression speed: %.2f KB/sec (out)', alg, 
                 out_sizes[alg] / times[alg] / 1024)

    print('')
    
    req = dict()
    for alg in ('lzma', 'bzip2', 'zlib'):
        backend_req = math.ceil(upload_speed * times[alg] / out_sizes[alg])
        fuse_req = math.ceil(fuse_speed * times[alg] / size)
        req[alg] = min(backend_req, fuse_req)
        print('When using %s compression, incoming writes can keep up to %d threads\n'
              'busy. The backend can handle data from up to %d threads. Therefore,\n'
              'the maximum achievable throughput is %.2f KB/sec with %d threads.\n'
              % (alg, fuse_req, backend_req, min(upload_speed, fuse_speed)/1024, req[alg]))
        
    print('All numbers assume that the test file is representative and that',
          'there are enough processor cores to run all threads in parallel.',
          'To compensate for network latency, you should start about twice as',
          'many upload threads as you need for optimal performance.\n', sep='\n')
    
    cores = os.sysconf('SC_NPROCESSORS_ONLN')
    best_size = None
    max_threads = cores
    while best_size is None:
        for alg in out_sizes:
            if req[alg] > max_threads:
                continue
            if best_size is None or out_sizes[alg] < best_size:
                best_size = out_sizes[alg]
                best_alg = alg
                threads = req[alg]
                
        max_threads = min(req.itervalues())
    
    print('This system appears to have %d cores, so best performance with maximum\n'
          'compression ratio would be achieved by using %s compression with %d\n'
          'upload threads.' % (cores, best_alg,
                                2 * threads if cores >= threads else 2 * cores))    
Ejemplo n.º 16
0
Archivo: mkfs.py Proyecto: drewlu/ossql
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
    
    plain_bucket = get_bucket(options, plain=True)
    
    if 's3ql_metadata' in plain_bucket:
        if not options.force:
            raise QuietError("Found existing file system! Use --force to overwrite")
            
        log.info('Purging existing file system data..')
        plain_bucket.clear()
        if not plain_bucket.is_get_consistent():
            log.info('Please note that the new file system may appear inconsistent\n'
                     'for a while until the removals have propagated through the backend.')
            
    if not options.plain:
        if sys.stdin.isatty():
            wrap_pw = getpass("Enter encryption password: "******"Confirm encryption password: "******"Passwords don't match.")
        else:
            wrap_pw = sys.stdin.readline().rstrip()

        # Generate data encryption passphrase
        log.info('Generating random encryption key...')
        fh = open('/dev/urandom', "rb", 0) # No buffering
        data_pw = fh.read(32)
        fh.close()
        
        bucket = BetterBucket(wrap_pw, 'bzip2', plain_bucket)
        bucket['s3ql_passphrase'] = data_pw
    else:    
        data_pw = None
        
    bucket = BetterBucket(data_pw, 'bzip2', plain_bucket)

    # Setup database
    cachepath = get_bucket_cachedir(options.storage_url, options.cachedir)

    # There can't be a corresponding bucket, so we can safely delete
    # these files.
    if os.path.exists(cachepath + '.db'):
        os.unlink(cachepath + '.db')
    if os.path.exists(cachepath + '-cache'):
        shutil.rmtree(cachepath + '-cache')

    log.info('Creating metadata tables...')
    db = Connection(cachepath + '.db')
    create_tables(db)
    init_tables(db)

    param = dict()
    param['revision'] = CURRENT_FS_REV
    param['seq_no'] = 1
    param['label'] = options.label
    param['blocksize'] = options.blocksize * 1024
    param['needs_fsck'] = False
    param['last_fsck'] = time.time() - time.timezone
    param['last-modified'] = time.time() - time.timezone
    
    # This indicates that the convert_legacy_metadata() stuff
    # in BetterBucket is not required for this file system.
    param['bucket_revision'] = 1
    
    bucket.store('s3ql_seq_no_%d' % param['seq_no'], 'Empty')

    log.info('Uploading metadata...')
    with bucket.open_write('s3ql_metadata', param) as fh:
        dump_metadata(fh, db)  
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
Ejemplo n.º 17
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # /dev/urandom may be slow, so we cache the data first
    log.info('Preparing test data...')
    rnd_fh = tempfile.TemporaryFile()
    with open('/dev/urandom', 'rb', 0) as src:
        copied = 0
        while copied < 50 * 1024 * 1024:
            buf = src.read(BUFSIZE)
            rnd_fh.write(buf)
            copied += len(buf)
            
    log.info('Measuring throughput to cache...')
    backend_dir = tempfile.mkdtemp()
    mnt_dir = tempfile.mkdtemp()
    atexit.register(shutil.rmtree, backend_dir)
    atexit.register(shutil.rmtree, mnt_dir)
    
    write_time = 0
    size = 50 * 1024 * 1024
    while write_time < 3:        
        log.debug('Write took %.3g seconds, retrying', write_time) 
        subprocess.check_call(['mkfs.s3ql', '--plain', 'local://%s' % backend_dir,
                               '--quiet', '--force', '--cachedir', options.cachedir])
        subprocess.check_call(['mount.s3ql', '--threads', '1', '--quiet',
                               '--cachesize', '%d' % (2 * size / 1024), '--log',
                               '%s/mount.log' % backend_dir, '--cachedir', options.cachedir,
                               'local://%s' % backend_dir, mnt_dir])
        try:    
            size *= 2    
            with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst:
                rnd_fh.seek(0)
                write_time = time.time()
                copied = 0
                while copied < size:
                    buf = rnd_fh.read(BUFSIZE)
                    if not buf:
                        rnd_fh.seek(0)
                        continue
                    dst.write(buf)
                    copied += len(buf)
        
            write_time = time.time() - write_time
            os.unlink('%s/bigfile' % mnt_dir)
        finally:
            subprocess.check_call(['umount.s3ql', mnt_dir])
            
    fuse_speed = copied / write_time
    log.info('Cache throughput: %d KiB/sec', fuse_speed / 1024)

    # Upload random data to prevent effects of compression
    # on the network layer
    log.info('Measuring raw backend throughput..')
    try:
        backend = get_backend(options, plain=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc))
    
    upload_time = 0
    size = 512 * 1024
    while upload_time < 10:
        size *= 2
        def do_write(dst):
            rnd_fh.seek(0)
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = rnd_fh.read(BUFSIZE)
                if not buf:
                    rnd_fh.seek(0)
                    continue
                dst.write(buf)
                copied += len(buf)
            return (copied, stamp)
        (upload_size, upload_time) = backend.perform_write(do_write, 's3ql_testdata')
        upload_time = time.time() - upload_time
    backend_speed = upload_size / upload_time
    log.info('Backend throughput: %d KiB/sec', backend_speed / 1024)
    backend.delete('s3ql_testdata')

    src = options.file
    size = os.fstat(options.file.fileno()).st_size
    log.info('Test file size: %.2f MiB', (size / 1024 ** 2))

    in_speed = dict()
    out_speed = dict()
    for alg in ALGS:
        log.info('compressing with %s...', alg)
        backend = BetterBackend('pass', alg, Backend('local://' + backend_dir, None, None))
        def do_write(dst): #pylint: disable=E0102
            src.seek(0)
            stamp = time.time()
            while True:
                buf = src.read(BUFSIZE)
                if not buf:
                    break
                dst.write(buf)
            return (dst, stamp)            
        (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata')
        dt = time.time() - stamp
        in_speed[alg] = size / dt
        out_speed[alg] = dst_fh.get_obj_size() / dt
        log.info('%s compression speed: %d KiB/sec per thread (in)', alg, in_speed[alg] / 1024)
        log.info('%s compression speed: %d KiB/sec per thread (out)', alg, out_speed[alg] / 1024)

    print('')

    threads = set([1,2,4,8])
    cores = os.sysconf('SC_NPROCESSORS_ONLN')
    if cores != -1:
        threads.add(cores)
    if options.threads:
        threads.add(options.threads)
        
    print('%-26s' % 'Threads:',
          ('%12d' * len(threads)) % tuple(sorted(threads)))
    
    for alg in ALGS:
        speeds = []
        limits = []
        for t in sorted(threads):
            if fuse_speed > t * in_speed[alg]:
                limit = 'CPU'
                speed = t * in_speed[alg]
            else:
                limit = 'S3QL/FUSE'
                speed = fuse_speed

            if speed / in_speed[alg] * out_speed[alg] > backend_speed:
                limit = 'uplink'
                speed = backend_speed * in_speed[alg] / out_speed[alg]
                
            limits.append(limit)
            speeds.append(speed / 1024)    

        print('%-26s' % ('Max FS throughput (%s):' % alg),
              ('%7d KiB/s' * len(threads)) % tuple(speeds))
        print('%-26s' % '..limited by:',
              ('%12s' * len(threads)) % tuple(limits))
                
    print('')                
    print('All numbers assume that the test file is representative and that',
          'there are enough processor cores to run all active threads in parallel.',
          'To compensate for network latency, you should use about twice as',
          'many upload threads as indicated by the above table.\n', sep='\n')
Ejemplo n.º 18
0
def main(args=None):
    '''Mount S3QL file system'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    
    # Save handler so that we can remove it when daemonizing
    stdout_log_handler = setup_logging(options)
    
    if options.threads is None:
        options.threads = determine_threads(options)

    if not os.path.exists(options.mountpoint):
        raise QuietError('Mountpoint does not exist.')
        
    if options.profile:
        import cProfile
        import pstats
        prof = cProfile.Profile()

    bucket_factory = get_bucket_factory(options)
    bucket_pool = BucketPool(bucket_factory)
    
    # Get paths
    cachepath = get_bucket_cachedir(options.storage_url, options.cachedir)

    # Retrieve metadata
    with bucket_pool() as bucket:
        (param, db) = get_metadata(bucket, cachepath, options.readonly)
            
    if options.nfs:
        log.info('Creating NFS indices...')
        # NFS may try to look up '..', so we have to speed up this kind of query
        db.execute('CREATE INDEX IF NOT EXISTS ix_contents_inode ON contents(inode)')
        
        # Since we do not support generation numbers, we have to keep the
        # likelihood of reusing a just-deleted inode low
        inode_cache.RANDOMIZE_INODES = True
    else:
        db.execute('DROP INDEX IF EXISTS ix_contents_inode')
                       
    metadata_upload_thread = MetadataUploadThread(bucket_pool, param, db,
                                                  options.metadata_upload_interval)
    metadata_download_thread = MetadataDownloadThread(bucket_pool, param, cachepath,
                                                      options.metadata_download_interval)
    block_cache = BlockCache(bucket_pool, db, cachepath + '-cache',
                             options.cachesize * 1024, options.max_cache_entries)
    commit_thread = CommitThread(block_cache)
    operations = fs.Operations(block_cache, db, blocksize=param['blocksize'],
                               upload_event=metadata_upload_thread.event)
    
    log.info('Mounting filesystem...')
    llfuse.init(operations, options.mountpoint, get_fuse_opts(options))

    if not options.fg:
        if stdout_log_handler:
            logging.getLogger().removeHandler(stdout_log_handler)
        daemonize(options.cachedir)

    exc_info = setup_exchook()

    # After we start threads, we must be sure to terminate them
    # or the process will hang 
    try:
        block_cache.init(options.threads)
        metadata_upload_thread.start()
        metadata_download_thread.start()
        commit_thread.start()
        
        if options.upstart:
            os.kill(os.getpid(), signal.SIGSTOP)
        if options.profile:
            prof.runcall(llfuse.main, options.single)
        else:
            llfuse.main(options.single)
        
        log.info("FUSE main loop terminated.")
        
    except:
        log.info("Caught exception in main loop, unmounting file system...")  
          
        # Tell finally handler that there already is an exception
        if not exc_info:
            exc_info = sys.exc_info()
        
        # We do *not* unmount on exception. Why? E.g. if someone is mirroring the
        # mountpoint, and it suddenly becomes empty, all the mirrored data will be
        # deleted. However, it's crucial to still call llfuse.close, so that
        # Operations.destroy() can flush the inode cache.
        with llfuse.lock:
            llfuse.close(unmount=False)

        raise
            
    # Terminate threads
    finally:
        log.debug("Waiting for background threads...")
        for (op, with_lock) in ((metadata_upload_thread.stop, False),
                                (commit_thread.stop, False),
                                (block_cache.destroy, True),
                                (metadata_upload_thread.join, False),
                                (metadata_download_thread.join, False),
                                (commit_thread.join, False)):
            try:
                if with_lock:
                    with llfuse.lock:
                        op()
                else:
                    op()
            except:
                # We just live with the race cond here
                if not exc_info: 
                    exc_info = sys.exc_info()
                else:
                    log.exception("Exception during cleanup:")

        log.debug("All background threads terminated.")
 
    # Re-raise if main loop terminated due to exception in other thread
    # or during cleanup
    if exc_info:
        raise exc_info[0], exc_info[1], exc_info[2]
        
    # At this point, there should be no other threads left

    # Unmount
    log.info("Unmounting file system.")
    with llfuse.lock:
        llfuse.close()
    
    # Do not update .params yet, dump_metadata() may fail if the database is
    # corrupted, in which case we want to force an fsck.
       
    if not options.readonly:
        with bucket_pool() as bucket:   
            seq_no = get_seq_no(bucket)
            if metadata_upload_thread.db_mtime == os.stat(cachepath + '.db').st_mtime:
                log.info('File system unchanged, not uploading metadata.')
                del bucket['s3ql_seq_no_%d' % param['seq_no']]         
                param['seq_no'] -= 1
                pickle.dump(param, open(cachepath + '.params', 'wb'), 2)         
            elif seq_no == param['seq_no']:
                log.info('Uploading metadata...')     
                cycle_metadata(bucket)
                param['last-modified'] = time.time() - time.timezone
                with tempfile.TemporaryFile() as tmp:
                    dump_metadata(tmp, db)
                    tmp.seek(0)
                    with bucket.open_write('s3ql_metadata', param) as fh:
                        shutil.copyfileobj(tmp, fh)
                pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
            else:
                log.error('Remote metadata is newer than local (%d vs %d), '
                          'refusing to overwrite!', seq_no, param['seq_no'])
                log.error('The locally cached metadata will be *lost* the next time the file system '
                          'is mounted or checked and has therefore been backed up.')
                for name in (cachepath + '.params', cachepath + '.db'):
                    for i in reversed(range(4)):
                        if os.path.exists(name + '.%d' % i):
                            os.rename(name + '.%d' % i, name + '.%d' % (i+1))     
                    os.rename(name, name + '.0')
   
    db.execute('ANALYZE')
    db.execute('VACUUM')
    db.close() 

    if options.profile:
        tmp = tempfile.NamedTemporaryFile()
        prof.dump_stats(tmp.name)
        fh = open('s3ql_profile.txt', 'w')
        p = pstats.Stats(tmp.name, stream=fh)
        tmp.close()
        p.strip_dirs()
        p.sort_stats('cumulative')
        p.print_stats(50)
        p.sort_stats('time')
        p.print_stats(50)
        fh.close()