def test_disk_used(tmpdir): cachedir = tmpdir.strpath # Not write a file that is 1M big in this directory, and check the # size. The reason we use such a big file is that it makes us robust # to errors due to block allocation. a = array.array('i') sizeof_i = a.itemsize target_size = 1024 n = int(target_size * 1024 / sizeof_i) a = array.array('i', n * (1,)) with open(os.path.join(cachedir, 'test'), 'wb') as output: a.tofile(output) assert disk_used(cachedir) >= target_size assert disk_used(cachedir) < target_size + 12
def test_disk_used(tmpdir): cachedir = tmpdir.strpath # Not write a file that is 1M big in this directory, and check the # size. The reason we use such a big file is that it makes us robust # to errors due to block allocation. a = array.array('i') sizeof_i = a.itemsize target_size = 1024 n = int(target_size * 1024 / sizeof_i) a = array.array('i', n * (1, )) with open(os.path.join(cachedir, 'test'), 'wb') as output: a.tofile(output) assert disk_used(cachedir) >= target_size assert disk_used(cachedir) < target_size + 12
def bench_compress(dataset, name='', compress=('zlib', 0), cache_size=0, tries=5): """Bench joblib dump and load functions, compress modes.""" # generate output compression strategy string before joblib compatibility # check as it may override the compress variable with a non tuple type. compress_str = "Raw" if compress[1] == 0 else "{0} {1}".format(*compress) # joblib versions prior to 0.10 doesn't support tuple in compress argument # so only the second element of the tuple is used for those versions # and the compression strategy is ignored. if (isinstance(compress, tuple) and tuple(map(int, joblib.__version__.split('.')[:2])) < (0, 10)): compress = compress[1] time_write = time_read = du = mem_read = mem_write = [] clear_out() time_write, obj = timeit(joblib.dump, dataset, 'out/test.pkl', tries=tries, compress=compress, cache_size=cache_size) del obj gc.collect() mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl', compress=compress, cache_size=cache_size) delete_obj(dataset) du = disk_used('out') / 1024. time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries) delete_obj(obj) mem_read = memory_used(joblib.load, 'out/test.pkl') print_line(name, compress_str, time_write, time_read, mem_write, mem_read, du)
def bench_mmap(dataset, name='', cache_size=0, mmap_mode='r', tries=5): """Bench joblib dump and load functions, memmap modes.""" time_write = time_read = du = [] clear_out() time_write, _ = timeit(joblib.dump, dataset, 'out/test.pkl', tries=tries, cache_size=cache_size) mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl', cache_size=cache_size) delete_obj(dataset) time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries, mmap_mode=mmap_mode) delete_obj(obj) mem_read = memory_used(joblib.load, 'out/test.pkl', mmap_mode=mmap_mode) du = disk_used('out') / 1024. print_line(name, 'mmap %s' % mmap_mode, time_write, time_read, mem_write, mem_read, du)
def test_disk_used(): cachedir = mkdtemp() try: if os.path.exists(cachedir): shutil.rmtree(cachedir) os.mkdir(cachedir) # Not write a file that is 1M big in this directory, and check the # size. The reason we use such a big file is that it makes us robust # to errors due to block allocation. a = array.array('i') sizeof_i = a.itemsize target_size = 1024 n = int(target_size * 1024 / sizeof_i) a = array.array('i', n * (1, )) with open(os.path.join(cachedir, 'test'), 'wb') as output: a.tofile(output) nose.tools.assert_true(disk_used(cachedir) >= target_size) nose.tools.assert_true(disk_used(cachedir) < target_size + 12) finally: shutil.rmtree(cachedir)
def test_disk_used(): cachedir = mkdtemp() try: if os.path.exists(cachedir): shutil.rmtree(cachedir) os.mkdir(cachedir) # Not write a file that is 1M big in this directory, and check the # size. The reason we use such a big file is that it makes us robust # to errors due to block allocation. a = array.array('i') sizeof_i = a.itemsize target_size = 1024 n = int(target_size * 1024 / sizeof_i) a = array.array('i', n * (1,)) with open(os.path.join(cachedir, 'test'), 'wb') as output: a.tofile(output) nose.tools.assert_true(disk_used(cachedir) >= target_size) nose.tools.assert_true(disk_used(cachedir) < target_size + 12) finally: shutil.rmtree(cachedir)
def bench_mmap(dataset, name='', cache_size=0, mmap_mode='r', tries=5): """Bench joblib dump and load functions, memmap modes.""" time_write = time_read = du = [] clear_out() time_write, _ = timeit(joblib.dump, dataset, 'out/test.pkl', tries=tries, cache_size=cache_size) mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl', cache_size=cache_size) delete_obj(dataset) time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries, mmap_mode=mmap_mode) delete_obj(obj) mem_read = memory_used(joblib.load, 'out/test.pkl', mmap_mode=mmap_mode) du = disk_used('out') / 1024. print_line(name, 'mmap %s' % mmap_mode, time_write, time_read, mem_write, mem_read, du)
def bench_compress(dataset, name='', compress=('zlib', 0), cache_size=0, tries=5): """Bench joblib dump and load functions, compress modes.""" # generate output compression strategy string before joblib compatibility # check as it may override the compress variable with a non tuple type. compress_str = "Raw" if compress[1] == 0 else "{0} {1}".format(*compress) # joblib versions prior to 0.10 doesn't support tuple in compress argument # so only the second element of the tuple is used for those versions # and the compression strategy is ignored. if (isinstance(compress, tuple) and tuple(map(int, joblib.__version__.split('.')[:2])) < (0, 10)): compress = compress[1] time_write = time_read = du = mem_read = mem_write = [] clear_out() time_write, obj = timeit(joblib.dump, dataset, 'out/test.pkl', tries=tries, compress=compress, cache_size=cache_size) del obj gc.collect() mem_write = memory_used(joblib.dump, dataset, 'out/test.pkl', compress=compress, cache_size=cache_size) delete_obj(dataset) du = disk_used('out') / 1024. time_read, obj = timeit(joblib.load, 'out/test.pkl', tries=tries) delete_obj(obj) mem_read = memory_used(joblib.load, 'out/test.pkl') print_line(name, compress_str, time_write, time_read, mem_write, mem_read, du)