Example #1
0
def upload_objects(bucket, files, seed):
    """Upload a bunch of files to an S3 bucket
       IN:
         boto S3 bucket object
         list of file handles to upload
         seed for PRNG
       OUT:
         list of boto S3 key objects
    """
    keys = []
    name_generator = realistic.names(15, 4, seed=seed)

    for fp in files:
        print >> sys.stderr, 'sending file with size %dB' % fp.size
        key = Key(bucket)
        key.key = name_generator.next()
        key.set_contents_from_file(fp)
        keys.append(key)

    return keys
Example #2
0
def upload_objects(bucket, files, seed):
    """Upload a bunch of files to an S3 bucket
       IN:
         boto S3 bucket object
         list of file handles to upload
         seed for PRNG
       OUT:
         list of boto S3 key objects
    """
    keys = []
    name_generator = realistic.names(15, 4, seed=seed)

    for fp in files:
        print >> sys.stderr, 'sending file with size %dB' % fp.size
        key = Key(bucket)
        key.key = name_generator.next()
        key.set_contents_from_file(fp)
        keys.append(key)

    return keys
Example #3
0
def main():
    # parse options
    (options, args) = parse_options()

    if os.isatty(sys.stdin.fileno()):
        raise RuntimeError('Need configuration in stdin.')
    config = common.read_config(sys.stdin)
    conn = common.connect(config.s3)
    bucket = None

    try:
        # setup
        real_stdout = sys.stdout
        sys.stdout = sys.stderr

        # verify all required config items are present
        if 'readwrite' not in config:
            raise RuntimeError('readwrite section not found in config')
        for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
            if item not in config.readwrite:
                raise RuntimeError("Missing readwrite config item: {item}".format(item=item))
        for item in ['num', 'size', 'stddev']:
            if item not in config.readwrite.files:
                raise RuntimeError("Missing readwrite config item: files.{item}".format(item=item))

        seeds = dict(config.readwrite.get('random_seed', {}))
        seeds.setdefault('main', random.randrange(2**32))

        rand = random.Random(seeds['main'])

        for name in ['names', 'contents', 'writer', 'reader']:
            seeds.setdefault(name, rand.randrange(2**32))

        print 'Using random seeds: {seeds}'.format(seeds=seeds)

        # setup bucket and other objects
        bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30)
        bucket = conn.create_bucket(bucket_name)
        print "Created bucket: {name}".format(name=bucket.name)

        # check flag for deterministic file name creation
        if not config.readwrite.get('deterministic_file_names'):
            print 'Creating random file names'
            file_names = realistic.names(
                mean=15,
                stddev=4,
                seed=seeds['names'],
                )
            file_names = itertools.islice(file_names, config.readwrite.files.num)
            file_names = list(file_names)
        else:
            print 'Creating file names that are deterministic'
            file_names = []
            for x in xrange(config.readwrite.files.num):
                file_names.append('test_file_{num}'.format(num=x))

        files = realistic.files2(
            mean=1024 * config.readwrite.files.size,
            stddev=1024 * config.readwrite.files.stddev,
            seed=seeds['contents'],
            )
        q = gevent.queue.Queue()

        
        # warmup - get initial set of files uploaded if there are any writers specified
        if config.readwrite.writers > 0:
            print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
            warmup_pool = gevent.pool.Pool(size=100)
            for file_name in file_names:
                fp = next(files)
                warmup_pool.spawn_link_exception(
                    write_file,
                    bucket=bucket,
                    file_name=file_name,
                    fp=fp,
                    )
            warmup_pool.join()

        # main work
        print "Starting main worker loop."
        print "Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev)
        print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)
        group = gevent.pool.Group()
        rand_writer = random.Random(seeds['writer'])

        # Don't create random files if deterministic_files_names is set and true
        if not config.readwrite.get('deterministic_file_names'):
            for x in xrange(config.readwrite.writers):
                this_rand = random.Random(rand_writer.randrange(2**32))
                group.spawn_link_exception(
                    writer,
                    bucket=bucket,
                    worker_id=x,
                    file_names=file_names,
                    files=files,
                    queue=q,
                    rand=this_rand,
                    )

        # Since the loop generating readers already uses config.readwrite.readers
        # and the file names are already generated (randomly or deterministically),
        # this loop needs no additional qualifiers. If zero readers are specified,
        # it will behave as expected (no data is read)
        rand_reader = random.Random(seeds['reader'])
        for x in xrange(config.readwrite.readers):
            this_rand = random.Random(rand_reader.randrange(2**32))
            group.spawn_link_exception(
                reader,
                bucket=bucket,
                worker_id=x,
                file_names=file_names,
                queue=q,
                rand=this_rand,
                )
        def stop():
            group.kill(block=True)
            q.put(StopIteration)
        gevent.spawn_later(config.readwrite.duration, stop)

        # wait for all the tests to finish
        group.join()
        print 'post-join, queue size {size}'.format(size=q.qsize())

        if q.qsize() > 0:
            for temp_dict in q:
                if 'error' in temp_dict:
                    raise Exception('exception:\n\t{msg}\n\t{trace}'.format(
                                    msg=temp_dict['error']['msg'],
                                    trace=temp_dict['error']['traceback'])
                                   )
                else:
                    yaml.safe_dump(temp_dict, stream=real_stdout)

    finally:
        # cleanup
        if options.cleanup:
            if bucket is not None:
                common.nuke_bucket(bucket)
Example #4
0
def main():
    # parse options
    (options, args) = parse_options()

    if os.isatty(sys.stdin.fileno()):
        raise RuntimeError('Need configuration in stdin.')
    config = common.read_config(sys.stdin)
    conn = common.connect(config.s3)
    bucket = None

    try:
        # setup
        real_stdout = sys.stdout
        sys.stdout = sys.stderr

        # verify all required config items are present
        if 'roundtrip' not in config:
            raise RuntimeError('roundtrip section not found in config')
        for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
            if item not in config.roundtrip:
                raise RuntimeError("Missing roundtrip config item: {item}".format(item=item))
        for item in ['num', 'size', 'stddev']:
            if item not in config.roundtrip.files:
                raise RuntimeError("Missing roundtrip config item: files.{item}".format(item=item))

        seeds = dict(config.roundtrip.get('random_seed', {}))
        seeds.setdefault('main', random.randrange(2**32))

        rand = random.Random(seeds['main'])

        for name in ['names', 'contents', 'writer', 'reader']:
            seeds.setdefault(name, rand.randrange(2**32))

        print 'Using random seeds: {seeds}'.format(seeds=seeds)

        # setup bucket and other objects
        bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket, max_len=30)
        bucket = conn.create_bucket(bucket_name)
        print "Created bucket: {name}".format(name=bucket.name)
        objnames = realistic.names(
            mean=15,
            stddev=4,
            seed=seeds['names'],
            )
        objnames = itertools.islice(objnames, config.roundtrip.files.num)
        objnames = list(objnames)
        files = realistic.files(
            mean=1024 * config.roundtrip.files.size,
            stddev=1024 * config.roundtrip.files.stddev,
            seed=seeds['contents'],
            )
        q = gevent.queue.Queue()

        logger_g = gevent.spawn(yaml.safe_dump_all, q, stream=real_stdout)

        print "Writing {num} objects with {w} workers...".format(
            num=config.roundtrip.files.num,
            w=config.roundtrip.writers,
            )
        pool = gevent.pool.Pool(size=config.roundtrip.writers)
        start = time.time()
        for objname in objnames:
            fp = next(files)
            pool.spawn(
                writer,
                bucket=bucket,
                objname=objname,
                fp=fp,
                queue=q,
                )
        pool.join()
        stop = time.time()
        elapsed = stop - start
        q.put(dict(
                type='write_done',
                duration=int(round(elapsed * NANOSECOND)),
                ))

        print "Reading {num} objects with {w} workers...".format(
            num=config.roundtrip.files.num,
            w=config.roundtrip.readers,
            )
        # avoid accessing them in the same order as the writing
        rand.shuffle(objnames)
        pool = gevent.pool.Pool(size=config.roundtrip.readers)
        start = time.time()
        for objname in objnames:
            pool.spawn(
                reader,
                bucket=bucket,
                objname=objname,
                queue=q,
                )
        pool.join()
        stop = time.time()
        elapsed = stop - start
        q.put(dict(
                type='read_done',
                duration=int(round(elapsed * NANOSECOND)),
                ))

        q.put(StopIteration)
        logger_g.get()

    finally:
        # cleanup
        if options.cleanup:
            if bucket is not None:
                common.nuke_bucket(bucket)
Example #5
0
def main():
    # parse options
    (options, args) = parse_options()

    if os.isatty(sys.stdin.fileno()):
        raise RuntimeError('Need configuration in stdin.')
    config = common.read_config(sys.stdin)
    conn = common.connect(config.s3)
    bucket = None

    try:
        # setup
        real_stdout = sys.stdout
        sys.stdout = sys.stderr

        # verify all required config items are present
        if 'readwrite' not in config:
            raise RuntimeError('readwrite section not found in config')
        for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
            if item not in config.readwrite:
                raise RuntimeError("Missing readwrite config item: {item}".format(item=item))
        for item in ['num', 'size', 'stddev']:
            if item not in config.readwrite.files:
                raise RuntimeError("Missing readwrite config item: files.{item}".format(item=item))

        seeds = dict(config.readwrite.get('random_seed', {}))
        seeds.setdefault('main', random.randrange(2**32))

        rand = random.Random(seeds['main'])

        for name in ['names', 'contents', 'writer', 'reader']:
            seeds.setdefault(name, rand.randrange(2**32))

        print 'Using random seeds: {seeds}'.format(seeds=seeds)

        # setup bucket and other objects
        bucket_name = common.choose_bucket_prefix(config.readwrite.bucket, max_len=30)
        bucket = conn.create_bucket(bucket_name)
        print "Created bucket: {name}".format(name=bucket.name)
        file_names = realistic.names(
            mean=15,
            stddev=4,
            seed=seeds['names'],
            )
        file_names = itertools.islice(file_names, config.readwrite.files.num)
        file_names = list(file_names)
        files = realistic.files2(
            mean=1024 * config.readwrite.files.size,
            stddev=1024 * config.readwrite.files.stddev,
            seed=seeds['contents'],
            )
        q = gevent.queue.Queue()

        # warmup - get initial set of files uploaded
        print "Uploading initial set of {num} files".format(num=config.readwrite.files.num)
        warmup_pool = gevent.pool.Pool(size=100)
        for file_name in file_names:
            fp = next(files)
            warmup_pool.spawn_link_exception(
                write_file,
                bucket=bucket,
                file_name=file_name,
                fp=fp,
                )
        warmup_pool.join()

        # main work
        print "Starting main worker loop."
        print "Using file size: {size} +- {stddev}".format(size=config.readwrite.files.size, stddev=config.readwrite.files.stddev)
        print "Spawning {w} writers and {r} readers...".format(w=config.readwrite.writers, r=config.readwrite.readers)
        group = gevent.pool.Group()
        rand_writer = random.Random(seeds['writer'])
        for x in xrange(config.readwrite.writers):
            this_rand = random.Random(rand_writer.randrange(2**32))
            group.spawn_link_exception(
                writer,
                bucket=bucket,
                worker_id=x,
                file_names=file_names,
                files=files,
                queue=q,
                rand=this_rand,
                )
        rand_reader = random.Random(seeds['reader'])
        for x in xrange(config.readwrite.readers):
            this_rand = random.Random(rand_reader.randrange(2**32))
            group.spawn_link_exception(
                reader,
                bucket=bucket,
                worker_id=x,
                file_names=file_names,
                queue=q,
                rand=this_rand,
                )
        def stop():
            group.kill(block=True)
            q.put(StopIteration)
        gevent.spawn_later(config.readwrite.duration, stop)

        yaml.safe_dump_all(q, stream=real_stdout)

    finally:
        # cleanup
        if options.cleanup:
            if bucket is not None:
                common.nuke_bucket(bucket)
Example #6
0
def main():
    # parse options
    (options, args) = parse_options()

    if os.isatty(sys.stdin.fileno()):
        raise RuntimeError('Need configuration in stdin.')
    config = common.read_config(sys.stdin)
    conn = common.connect(config.s3)
    bucket = None

    try:
        # setup
        real_stdout = sys.stdout
        sys.stdout = sys.stderr

        # verify all required config items are present
        if 'roundtrip' not in config:
            raise RuntimeError('roundtrip section not found in config')
        for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
            if item not in config.roundtrip:
                raise RuntimeError(
                    "Missing roundtrip config item: {item}".format(item=item))
        for item in ['num', 'size', 'stddev']:
            if item not in config.roundtrip.files:
                raise RuntimeError(
                    "Missing roundtrip config item: files.{item}".format(
                        item=item))

        seeds = dict(config.roundtrip.get('random_seed', {}))
        seeds.setdefault('main', random.randrange(2**32))

        rand = random.Random(seeds['main'])

        for name in ['names', 'contents', 'writer', 'reader']:
            seeds.setdefault(name, rand.randrange(2**32))

        print 'Using random seeds: {seeds}'.format(seeds=seeds)

        # setup bucket and other objects
        bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket,
                                                  max_len=30)
        bucket = conn.create_bucket(bucket_name)
        print "Created bucket: {name}".format(name=bucket.name)
        objnames = realistic.names(
            mean=15,
            stddev=4,
            seed=seeds['names'],
        )
        objnames = itertools.islice(objnames, config.roundtrip.files.num)
        objnames = list(objnames)
        files = realistic.files(
            mean=1024 * config.roundtrip.files.size,
            stddev=1024 * config.roundtrip.files.stddev,
            seed=seeds['contents'],
        )
        q = gevent.queue.Queue()

        logger_g = gevent.spawn(yaml.safe_dump_all, q, stream=real_stdout)

        print "Writing {num} objects with {w} workers...".format(
            num=config.roundtrip.files.num,
            w=config.roundtrip.writers,
        )
        pool = gevent.pool.Pool(size=config.roundtrip.writers)
        start = time.time()
        for objname in objnames:
            fp = next(files)
            pool.spawn(
                writer,
                bucket=bucket,
                objname=objname,
                fp=fp,
                queue=q,
            )
        pool.join()
        stop = time.time()
        elapsed = stop - start
        q.put(
            dict(
                type='write_done',
                duration=int(round(elapsed * NANOSECOND)),
            ))

        print "Reading {num} objects with {w} workers...".format(
            num=config.roundtrip.files.num,
            w=config.roundtrip.readers,
        )
        # avoid accessing them in the same order as the writing
        rand.shuffle(objnames)
        pool = gevent.pool.Pool(size=config.roundtrip.readers)
        start = time.time()
        for objname in objnames:
            pool.spawn(
                reader,
                bucket=bucket,
                objname=objname,
                queue=q,
            )
        pool.join()
        stop = time.time()
        elapsed = stop - start
        q.put(
            dict(
                type='read_done',
                duration=int(round(elapsed * NANOSECOND)),
            ))

        q.put(StopIteration)
        logger_g.get()

    finally:
        # cleanup
        if options.cleanup:
            if bucket is not None:
                common.nuke_bucket(bucket)
Example #7
0
def main():
    # parse options
    (options, args) = parse_options()

    if os.isatty(sys.stdin.fileno()):
        raise RuntimeError('Need configuration in stdin.')
    config = common.read_config(sys.stdin)
    conn = common.connect(config.s3)
    bucket = None

    try:
        # setup
        real_stdout = sys.stdout
        sys.stdout = sys.stderr

        # verify all required config items are present
        if 'readwrite' not in config:
            raise RuntimeError('readwrite section not found in config')
        for item in ['readers', 'writers', 'duration', 'files', 'bucket']:
            if item not in config.readwrite:
                raise RuntimeError(
                    "Missing readwrite config item: {item}".format(item=item))
        for item in ['num', 'size', 'stddev']:
            if item not in config.readwrite.files:
                raise RuntimeError(
                    "Missing readwrite config item: files.{item}".format(
                        item=item))

        seeds = dict(config.readwrite.get('random_seed', {}))
        seeds.setdefault('main', random.randrange(2**32))

        rand = random.Random(seeds['main'])

        for name in ['names', 'contents', 'writer', 'reader']:
            seeds.setdefault(name, rand.randrange(2**32))

        print 'Using random seeds: {seeds}'.format(seeds=seeds)

        # setup bucket and other objects
        bucket_name = common.choose_bucket_prefix(config.readwrite.bucket,
                                                  max_len=30)
        bucket = conn.create_bucket(bucket_name)
        print "Created bucket: {name}".format(name=bucket.name)

        # check flag for deterministic file name creation
        if not config.readwrite.get('deterministic_file_names'):
            print 'Creating random file names'
            file_names = realistic.names(
                mean=15,
                stddev=4,
                seed=seeds['names'],
            )
            file_names = itertools.islice(file_names,
                                          config.readwrite.files.num)
            file_names = list(file_names)
        else:
            print 'Creating file names that are deterministic'
            file_names = []
            for x in xrange(config.readwrite.files.num):
                file_names.append('test_file_{num}'.format(num=x))

        files = realistic.files2(
            mean=1024 * config.readwrite.files.size,
            stddev=1024 * config.readwrite.files.stddev,
            seed=seeds['contents'],
        )
        q = gevent.queue.Queue()

        # warmup - get initial set of files uploaded if there are any writers specified
        if config.readwrite.writers > 0:
            print "Uploading initial set of {num} files".format(
                num=config.readwrite.files.num)
            warmup_pool = gevent.pool.Pool(size=100)
            for file_name in file_names:
                fp = next(files)
                warmup_pool.spawn_link_exception(
                    write_file,
                    bucket=bucket,
                    file_name=file_name,
                    fp=fp,
                )
            warmup_pool.join()

        # main work
        print "Starting main worker loop."
        print "Using file size: {size} +- {stddev}".format(
            size=config.readwrite.files.size,
            stddev=config.readwrite.files.stddev)
        print "Spawning {w} writers and {r} readers...".format(
            w=config.readwrite.writers, r=config.readwrite.readers)
        group = gevent.pool.Group()
        rand_writer = random.Random(seeds['writer'])

        # Don't create random files if deterministic_files_names is set and true
        if not config.readwrite.get('deterministic_file_names'):
            for x in xrange(config.readwrite.writers):
                this_rand = random.Random(rand_writer.randrange(2**32))
                group.spawn_link_exception(
                    writer,
                    bucket=bucket,
                    worker_id=x,
                    file_names=file_names,
                    files=files,
                    queue=q,
                    rand=this_rand,
                )

        # Since the loop generating readers already uses config.readwrite.readers
        # and the file names are already generated (randomly or deterministically),
        # this loop needs no additional qualifiers. If zero readers are specified,
        # it will behave as expected (no data is read)
        rand_reader = random.Random(seeds['reader'])
        for x in xrange(config.readwrite.readers):
            this_rand = random.Random(rand_reader.randrange(2**32))
            group.spawn_link_exception(
                reader,
                bucket=bucket,
                worker_id=x,
                file_names=file_names,
                queue=q,
                rand=this_rand,
            )

        def stop():
            group.kill(block=True)
            q.put(StopIteration)

        gevent.spawn_later(config.readwrite.duration, stop)

        # wait for all the tests to finish
        group.join()
        print 'post-join, queue size {size}'.format(size=q.qsize())

        if q.qsize() > 0:
            for temp_dict in q:
                if 'error' in temp_dict:
                    raise Exception('exception:\n\t{msg}\n\t{trace}'.format(
                        msg=temp_dict['error']['msg'],
                        trace=temp_dict['error']['traceback']))
                else:
                    yaml.safe_dump(temp_dict, stream=real_stdout)

    finally:
        # cleanup
        if options.cleanup:
            if bucket is not None:
                common.nuke_bucket(bucket)