def upload_objects(bucket, files, seed): """Upload a bunch of files to an S3 bucket IN: boto S3 bucket object list of file handles to upload seed for PRNG OUT: list of boto S3 key objects """ keys = [] name_generator = realistic.names(15, 4, seed=seed) for fp in files: print >> sys.stderr, 'sending file with size %dB' % fp.size key = Key(bucket) key.key = name_generator.next() key.set_contents_from_file(fp) keys.append(key) return keys
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'readwrite' not in config: raise RuntimeError('readwrite section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.readwrite: raise RuntimeError("Missing readwrite config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.readwrite.files: raise RuntimeError("Missing readwrite config item: files.{item}".format(item=item)) seeds = dict(config.readwrite.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) # check flag for deterministic file name creation if not config.readwrite.get('deterministic_file_names'): print 'Creating random file names' file_names = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) file_names = itertools.islice(file_names, config.readwrite.files.num) file_names = list(file_names) else: print 'Creating file names that are deterministic' file_names = [] for x in xrange(config.readwrite.files.num): file_names.append('test_file_{num}'.format(num=x)) files = realistic.files2( mean=1024 * config.readwrite.files.size, stddev=1024 * config.readwrite.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() # warmup - get initial set of files uploaded if there are any writers specified if config.readwrite.writers > 0: print "Uploading initial set of {num} files".format(num=config.readwrite.files.num) warmup_pool = gevent.pool.Pool(size=100) for file_name in file_names: fp = next(files) warmup_pool.spawn_link_exception( write_file, bucket=bucket, file_name=file_name, fp=fp, ) warmup_pool.join() # main work print "Starting main worker loop." print "Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev) print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers) group = gevent.pool.Group() rand_writer = random.Random(seeds['writer']) # Don't create random files if deterministic_files_names is set and true if not config.readwrite.get('deterministic_file_names'): for x in xrange(config.readwrite.writers): this_rand = random.Random(rand_writer.randrange(2**32)) group.spawn_link_exception( writer, bucket=bucket, worker_id=x, file_names=file_names, files=files, queue=q, rand=this_rand, ) # Since the loop generating readers already uses config.readwrite.readers # and the file names are already generated (randomly or deterministically), # this loop needs no additional qualifiers. If zero readers are specified, # it will behave as expected (no data is read) rand_reader = random.Random(seeds['reader']) for x in xrange(config.readwrite.readers): this_rand = random.Random(rand_reader.randrange(2**32)) group.spawn_link_exception( reader, bucket=bucket, worker_id=x, file_names=file_names, queue=q, rand=this_rand, ) def stop(): group.kill(block=True) q.put(StopIteration) gevent.spawn_later(config.readwrite.duration, stop) # wait for all the tests to finish group.join() print 'post-join, queue size {size}'.format(size=q.qsize()) if q.qsize() > 0: for temp_dict in q: if 'error' in temp_dict: raise Exception('exception:\n\t{msg}\n\t{trace}'.format( msg=temp_dict['error']['msg'], trace=temp_dict['error']['traceback']) ) else: yaml.safe_dump(temp_dict, stream=real_stdout) finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'roundtrip' not in config: raise RuntimeError('roundtrip section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.roundtrip: raise RuntimeError("Missing roundtrip config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.roundtrip.files: raise RuntimeError("Missing roundtrip config item: files.{item}".format(item=item)) seeds = dict(config.roundtrip.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) objnames = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) objnames = itertools.islice(objnames, config.roundtrip.files.num) objnames = list(objnames) files = realistic.files( mean=1024 * config.roundtrip.files.size, stddev=1024 * config.roundtrip.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() logger_g = gevent.spawn(yaml.safe_dump_all, q, stream=real_stdout) print "Writing {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.writers, ) pool = gevent.pool.Pool(size=config.roundtrip.writers) start = time.time() for objname in objnames: fp = next(files) pool.spawn( writer, bucket=bucket, objname=objname, fp=fp, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put(dict( type='write_done', duration=int(round(elapsed * NANOSECOND)), )) print "Reading {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.readers, ) # avoid accessing them in the same order as the writing rand.shuffle(objnames) pool = gevent.pool.Pool(size=config.roundtrip.readers) start = time.time() for objname in objnames: pool.spawn( reader, bucket=bucket, objname=objname, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put(dict( type='read_done', duration=int(round(elapsed * NANOSECOND)), )) q.put(StopIteration) logger_g.get() finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'readwrite' not in config: raise RuntimeError('readwrite section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.readwrite: raise RuntimeError("Missing readwrite config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.readwrite.files: raise RuntimeError("Missing readwrite config item: files.{item}".format(item=item)) seeds = dict(config.readwrite.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) file_names = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) file_names = itertools.islice(file_names, config.readwrite.files.num) file_names = list(file_names) files = realistic.files2( mean=1024 * config.readwrite.files.size, stddev=1024 * config.readwrite.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() # warmup - get initial set of files uploaded print "Uploading initial set of {num} files".format(num=config.readwrite.files.num) warmup_pool = gevent.pool.Pool(size=100) for file_name in file_names: fp = next(files) warmup_pool.spawn_link_exception( write_file, bucket=bucket, file_name=file_name, fp=fp, ) warmup_pool.join() # main work print "Starting main worker loop." print "Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev) print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers) group = gevent.pool.Group() rand_writer = random.Random(seeds['writer']) for x in xrange(config.readwrite.writers): this_rand = random.Random(rand_writer.randrange(2**32)) group.spawn_link_exception( writer, bucket=bucket, worker_id=x, file_names=file_names, files=files, queue=q, rand=this_rand, ) rand_reader = random.Random(seeds['reader']) for x in xrange(config.readwrite.readers): this_rand = random.Random(rand_reader.randrange(2**32)) group.spawn_link_exception( reader, bucket=bucket, worker_id=x, file_names=file_names, queue=q, rand=this_rand, ) def stop(): group.kill(block=True) q.put(StopIteration) gevent.spawn_later(config.readwrite.duration, stop) yaml.safe_dump_all(q, stream=real_stdout) finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'roundtrip' not in config: raise RuntimeError('roundtrip section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.roundtrip: raise RuntimeError( "Missing roundtrip config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.roundtrip.files: raise RuntimeError( "Missing roundtrip config item: files.{item}".format( item=item)) seeds = dict(config.roundtrip.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) objnames = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) objnames = itertools.islice(objnames, config.roundtrip.files.num) objnames = list(objnames) files = realistic.files( mean=1024 * config.roundtrip.files.size, stddev=1024 * config.roundtrip.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() logger_g = gevent.spawn(yaml.safe_dump_all, q, stream=real_stdout) print "Writing {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.writers, ) pool = gevent.pool.Pool(size=config.roundtrip.writers) start = time.time() for objname in objnames: fp = next(files) pool.spawn( writer, bucket=bucket, objname=objname, fp=fp, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put( dict( type='write_done', duration=int(round(elapsed * NANOSECOND)), )) print "Reading {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.readers, ) # avoid accessing them in the same order as the writing rand.shuffle(objnames) pool = gevent.pool.Pool(size=config.roundtrip.readers) start = time.time() for objname in objnames: pool.spawn( reader, bucket=bucket, objname=objname, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put( dict( type='read_done', duration=int(round(elapsed * NANOSECOND)), )) q.put(StopIteration) logger_g.get() finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'readwrite' not in config: raise RuntimeError('readwrite section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.readwrite: raise RuntimeError( "Missing readwrite config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.readwrite.files: raise RuntimeError( "Missing readwrite config item: files.{item}".format( item=item)) seeds = dict(config.readwrite.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) # check flag for deterministic file name creation if not config.readwrite.get('deterministic_file_names'): print 'Creating random file names' file_names = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) file_names = itertools.islice(file_names, config.readwrite.files.num) file_names = list(file_names) else: print 'Creating file names that are deterministic' file_names = [] for x in xrange(config.readwrite.files.num): file_names.append('test_file_{num}'.format(num=x)) files = realistic.files2( mean=1024 * config.readwrite.files.size, stddev=1024 * config.readwrite.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() # warmup - get initial set of files uploaded if there are any writers specified if config.readwrite.writers > 0: print "Uploading initial set of {num} files".format( num=config.readwrite.files.num) warmup_pool = gevent.pool.Pool(size=100) for file_name in file_names: fp = next(files) warmup_pool.spawn_link_exception( write_file, bucket=bucket, file_name=file_name, fp=fp, ) warmup_pool.join() # main work print "Starting main worker loop." print "Using file size: {size} +- {stddev}".format( size=config.readwrite.files.size, stddev=config.readwrite.files.stddev) print "Spawning {w} writers and {r} readers...".format( w=config.readwrite.writers, r=config.readwrite.readers) group = gevent.pool.Group() rand_writer = random.Random(seeds['writer']) # Don't create random files if deterministic_files_names is set and true if not config.readwrite.get('deterministic_file_names'): for x in xrange(config.readwrite.writers): this_rand = random.Random(rand_writer.randrange(2**32)) group.spawn_link_exception( writer, bucket=bucket, worker_id=x, file_names=file_names, files=files, queue=q, rand=this_rand, ) # Since the loop generating readers already uses config.readwrite.readers # and the file names are already generated (randomly or deterministically), # this loop needs no additional qualifiers. If zero readers are specified, # it will behave as expected (no data is read) rand_reader = random.Random(seeds['reader']) for x in xrange(config.readwrite.readers): this_rand = random.Random(rand_reader.randrange(2**32)) group.spawn_link_exception( reader, bucket=bucket, worker_id=x, file_names=file_names, queue=q, rand=this_rand, ) def stop(): group.kill(block=True) q.put(StopIteration) gevent.spawn_later(config.readwrite.duration, stop) # wait for all the tests to finish group.join() print 'post-join, queue size {size}'.format(size=q.qsize()) if q.qsize() > 0: for temp_dict in q: if 'error' in temp_dict: raise Exception('exception:\n\t{msg}\n\t{trace}'.format( msg=temp_dict['error']['msg'], trace=temp_dict['error']['traceback'])) else: yaml.safe_dump(temp_dict, stream=real_stdout) finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)