Esempio n. 1
0
File: adm.py Progetto: drewlu/ossql
def main(args=None):
    '''Change or show S3QL file system parameters'''

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Check if fs is mounted on this computer
    # This is not foolproof but should prevent common mistakes
    match = options.storage_url + ' /'
    with open('/proc/mounts', 'r') as fh:
        for line in fh:
            if line.startswith(match):
                raise QuietError('Can not work on mounted file system.')
               
    if options.action == 'clear':
        return clear(get_bucket(options, plain=True),
                     get_bucket_cachedir(options.storage_url, options.cachedir))
    
    if options.action == 'upgrade':
        return upgrade(get_possibly_old_bucket(options))
        
    bucket = get_bucket(options)
    
    if options.action == 'passphrase':
        return change_passphrase(bucket)

    if options.action == 'download-metadata':
        return download_metadata(bucket, options.storage_url)
Esempio n. 2
0
File: fsck.py Progetto: drewlu/ossql
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
        
    # Check if fs is mounted on this computer
    # This is not foolproof but should prevent common mistakes
    match = options.storage_url + ' /'
    with open('/proc/mounts', 'r') as fh:
        for line in fh:
            if line.startswith(match):
                raise QuietError('Can not check mounted file system.')
    

    bucket = get_bucket(options)
    
    cachepath = get_bucket_cachedir(options.storage_url, options.cachedir)
    seq_no = get_seq_no(bucket)
    param_remote = bucket.lookup('s3ql_metadata')
    db = None
    
    if os.path.exists(cachepath + '.params'):
        assert os.path.exists(cachepath + '.db')
        param = pickle.load(open(cachepath + '.params', 'rb'))
        if param['seq_no'] < seq_no:
            log.info('Ignoring locally cached metadata (outdated).')
            param = bucket.lookup('s3ql_metadata')
        else:
            log.info('Using cached metadata.')
            db = Connection(cachepath + '.db')
            assert not os.path.exists(cachepath + '-cache') or param['needs_fsck']
    
        if param_remote['seq_no'] != param['seq_no']:
            log.warn('Remote metadata is outdated.')
            param['needs_fsck'] = True
            
    else:
        param = param_remote
        assert not os.path.exists(cachepath + '-cache')
        # .db might exist if mount.s3ql is killed at exactly the right instant
        # and should just be ignored.
       
    # Check revision
    if param['revision'] < CURRENT_FS_REV:
        raise QuietError('File system revision too old, please run `s3qladm upgrade` first.')
    elif param['revision'] > CURRENT_FS_REV:
        raise QuietError('File system revision too new, please update your '
                         'S3QL installation.')
    
    if param['seq_no'] < seq_no:
        if bucket.is_get_consistent():
            print(textwrap.fill(textwrap.dedent('''\
                  Up to date metadata is not available. Probably the file system has not
                  been properly unmounted and you should try to run fsck on the computer 
                  where the file system has been mounted most recently.
                  ''')))
        else:
            print(textwrap.fill(textwrap.dedent('''\
                  Up to date metadata is not available. Either the file system has not
                  been unmounted cleanly or the data has not yet propagated through the backend.
                  In the later case, waiting for a while should fix the problem, in
                  the former case you should try to run fsck on the computer where
                  the file system has been mounted most recently
                  ''')))
    
        print('Enter "continue" to use the outdated data anyway:',
              '> ', sep='\n', end='')
        if options.batch:
            raise QuietError('(in batch mode, exiting)')
        if sys.stdin.readline().strip() != 'continue':
            raise QuietError()
        
        param['seq_no'] = seq_no
        param['needs_fsck'] = True
    
    
    if (not param['needs_fsck'] 
        and ((time.time() - time.timezone) - param['last_fsck'])
             < 60 * 60 * 24 * 31): # last check more than 1 month ago
        if options.force:
            log.info('File system seems clean, checking anyway.')
        else:
            log.info('File system is marked as clean. Use --force to force checking.')
            return
    
    # If using local metadata, check consistency
    if db:
        log.info('Checking DB integrity...')
        try:
            # get_list may raise CorruptError itself
            res = db.get_list('PRAGMA integrity_check(20)')
            if res[0][0] != u'ok':
                log.error('\n'.join(x[0] for x in res ))
                raise apsw.CorruptError()
        except apsw.CorruptError:
            raise QuietError('Local metadata is corrupted. Remove or repair the following '
                             'files manually and re-run fsck:\n'
                             + cachepath + '.db (corrupted)\n'
                             + cachepath + '.param (intact)')
    else:
        log.info("Downloading & uncompressing metadata...")
        os.close(os.open(cachepath + '.db.tmp', os.O_RDWR | os.O_CREAT | os.O_TRUNC,
                         stat.S_IRUSR | stat.S_IWUSR)) 
        db = Connection(cachepath + '.db.tmp', fast_mode=True)
        with bucket.open_read("s3ql_metadata") as fh:
            restore_metadata(fh, db)
        db.close()
        os.rename(cachepath + '.db.tmp', cachepath + '.db')
        db = Connection(cachepath + '.db')
    
    # Increase metadata sequence no 
    param['seq_no'] += 1
    param['needs_fsck'] = True
    bucket['s3ql_seq_no_%d' % param['seq_no']] = 'Empty'
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
    
    fsck = Fsck(cachepath + '-cache', bucket, param, db)
    fsck.check()
    
    if fsck.uncorrectable_errors:
        raise QuietError("Uncorrectable errors found, aborting.")
        
    if os.path.exists(cachepath + '-cache'):
        os.rmdir(cachepath + '-cache')
        
    log.info('Saving metadata...')
    fh = tempfile.TemporaryFile()
    dump_metadata(fh, db)  
            
    log.info("Compressing & uploading metadata..")
    cycle_metadata(bucket)
    fh.seek(0)
    param['needs_fsck'] = False
    param['last_fsck'] = time.time() - time.timezone
    param['last-modified'] = time.time() - time.timezone
    with bucket.open_write("s3ql_metadata", param) as dst:
        fh.seek(0)
        shutil.copyfileobj(fh, dst)
    fh.close()
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
        
    db.execute('ANALYZE')
    db.execute('VACUUM')
    db.close() 
Esempio n. 3
0
File: mkfs.py Progetto: drewlu/ossql
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)
    
    plain_bucket = get_bucket(options, plain=True)
    
    if 's3ql_metadata' in plain_bucket:
        if not options.force:
            raise QuietError("Found existing file system! Use --force to overwrite")
            
        log.info('Purging existing file system data..')
        plain_bucket.clear()
        if not plain_bucket.is_get_consistent():
            log.info('Please note that the new file system may appear inconsistent\n'
                     'for a while until the removals have propagated through the backend.')
            
    if not options.plain:
        if sys.stdin.isatty():
            wrap_pw = getpass("Enter encryption password: "******"Confirm encryption password: "******"Passwords don't match.")
        else:
            wrap_pw = sys.stdin.readline().rstrip()

        # Generate data encryption passphrase
        log.info('Generating random encryption key...')
        fh = open('/dev/urandom', "rb", 0) # No buffering
        data_pw = fh.read(32)
        fh.close()
        
        bucket = BetterBucket(wrap_pw, 'bzip2', plain_bucket)
        bucket['s3ql_passphrase'] = data_pw
    else:    
        data_pw = None
        
    bucket = BetterBucket(data_pw, 'bzip2', plain_bucket)

    # Setup database
    cachepath = get_bucket_cachedir(options.storage_url, options.cachedir)

    # There can't be a corresponding bucket, so we can safely delete
    # these files.
    if os.path.exists(cachepath + '.db'):
        os.unlink(cachepath + '.db')
    if os.path.exists(cachepath + '-cache'):
        shutil.rmtree(cachepath + '-cache')

    log.info('Creating metadata tables...')
    db = Connection(cachepath + '.db')
    create_tables(db)
    init_tables(db)

    param = dict()
    param['revision'] = CURRENT_FS_REV
    param['seq_no'] = 1
    param['label'] = options.label
    param['blocksize'] = options.blocksize * 1024
    param['needs_fsck'] = False
    param['last_fsck'] = time.time() - time.timezone
    param['last-modified'] = time.time() - time.timezone
    
    # This indicates that the convert_legacy_metadata() stuff
    # in BetterBucket is not required for this file system.
    param['bucket_revision'] = 1
    
    bucket.store('s3ql_seq_no_%d' % param['seq_no'], 'Empty')

    log.info('Uploading metadata...')
    with bucket.open_write('s3ql_metadata', param) as fh:
        dump_metadata(fh, db)  
    pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
Esempio n. 4
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    args = ['local:///home/nikratio/tmp/bucket', '/home/nikratio/tmp/testfile0']

    options = parse_args(args)
    setup_logging(options)

    size = 100*1024*1024 # KB
    log.info('Measuring throughput to cache...')
    bucket_dir = tempfile.mkdtemp()
    mnt_dir = tempfile.mkdtemp()
    atexit.register(shutil.rmtree, bucket_dir)
    atexit.register(shutil.rmtree, mnt_dir)
    subprocess.check_call(['mkfs.s3ql', '--plain', 'local://%s' % bucket_dir,
                           '--quiet', '--cachedir', options.cachedir])
    subprocess.check_call(['mount.s3ql', '--threads', '1', '--quiet', 
                           '--cachesize', '%d' % (2 * size/1024), '--log',
                           '%s/mount.log' % bucket_dir, '--cachedir', options.cachedir,
                           'local://%s' % bucket_dir, mnt_dir])
    with open('/dev/urandom', 'rb', 0) as src:
        with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst:
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = src.read(256*1024)
                dst.write(buf)
                copied += len(buf)
            fuse_speed = copied / (time.time() - stamp)
    os.unlink('%s/bigfile' % mnt_dir)
    subprocess.check_call(['umount.s3ql', mnt_dir])                           
    log.info('Cache throughput: %.2f KB/sec', fuse_speed / 1024)

    # Upload random data to prevent effects of compression
    # on the network layer
    log.info('Measuring raw backend throughput..')
    bucket = get_bucket(options, plain=True)
    with bucket.open_write('s3ql_testdata') as dst:
        with open('/dev/urandom', 'rb', 0) as src:
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = src.read(256*1024)
                dst.write(buf)
                copied += len(buf)
            upload_speed = copied / (time.time() - stamp)
    log.info('Backend throughput: %.2f KB/sec', upload_speed / 1024)
    bucket.delete('s3ql_testdata')
    
    src = options.file
    size = os.fstat(options.file.fileno()).st_size 
    log.info('Test file size: %.2f MB', (size / 1024**2))
        
    times = dict()
    out_sizes = dict()
    for alg in ('lzma', 'bzip2', 'zlib'):
        log.info('compressing with %s...', alg)
        bucket = BetterBucket('pass', alg, Bucket(bucket_dir, None, None))
        with bucket.open_write('s3ql_testdata') as dst:
            src.seek(0)
            stamp = time.time()
            while True:
                buf = src.read(256*1024)
                if not buf:
                    break
                dst.write(buf)
            times[alg] = time.time() - stamp
            out_sizes[alg] = dst.compr_size
        log.info('%s compression speed: %.2f KB/sec (in)', alg, size/times[alg]/1024)
        log.info('%s compression speed: %.2f KB/sec (out)', alg, 
                 out_sizes[alg] / times[alg] / 1024)

    print('')
    
    req = dict()
    for alg in ('lzma', 'bzip2', 'zlib'):
        backend_req = math.ceil(upload_speed * times[alg] / out_sizes[alg])
        fuse_req = math.ceil(fuse_speed * times[alg] / size)
        req[alg] = min(backend_req, fuse_req)
        print('When using %s compression, incoming writes can keep up to %d threads\n'
              'busy. The backend can handle data from up to %d threads. Therefore,\n'
              'the maximum achievable throughput is %.2f KB/sec with %d threads.\n'
              % (alg, fuse_req, backend_req, min(upload_speed, fuse_speed)/1024, req[alg]))
        
    print('All numbers assume that the test file is representative and that',
          'there are enough processor cores to run all threads in parallel.',
          'To compensate for network latency, you should start about twice as',
          'many upload threads as you need for optimal performance.\n', sep='\n')
    
    cores = os.sysconf('SC_NPROCESSORS_ONLN')
    best_size = None
    max_threads = cores
    while best_size is None:
        for alg in out_sizes:
            if req[alg] > max_threads:
                continue
            if best_size is None or out_sizes[alg] < best_size:
                best_size = out_sizes[alg]
                best_alg = alg
                threads = req[alg]
                
        max_threads = min(req.itervalues())
    
    print('This system appears to have %d cores, so best performance with maximum\n'
          'compression ratio would be achieved by using %s compression with %d\n'
          'upload threads.' % (cores, best_alg,
                                2 * threads if cores >= threads else 2 * cores))