def histogram_prefetch( path, lazy, block_size, prefetch_storage, nbins=20, output_dir="../outputs", bfile="real.out", ): print("In prefetch", path) fs = S3PrefetchFileSystem() fs.invalidate_cache() nnodes = [] start = perf_counter_ns() with fs.open( path, block_size=block_size, prefetch_storage=prefetch_storage, header_bytes=1000, ) as f: tfile = nib.streamlines.load(f, lazy_load=lazy) for stream in tfile.streamlines: nnodes.append(len(stream)) output_file = op.join(output_dir, f"histogram_prefetch_{len(path)}.pdf") create_fig(output_file, nnodes, nbins)
def segmentation_prefetch( path, lazy, block_size, prefetch_storage, alg="waypoint", output_dir="../outputs", njobs="-1", bfile="real.out", ): print("In prefetch", path) data = seg_setup(alg) fs = S3PrefetchFileSystem() fs.invalidate_cache() start = perf_counter_ns() with fs.open( path, block_size=block_size, prefetch_storage=prefetch_storage, header_bytes=1000, ) as f: tfile = nib.streamlines.load(f, lazy_load=lazy) # tfile.save(op.join(output_dir, "prefetch.trk")) # return "prefetch done" tractogram_obj = tfile.tractogram streamlines = tractogram_obj.streamlines end = perf_counter_ns() with open(bfile, "a+") as fo: fo.write( f'read_prefetch,"{path}",{start},{(end-start)*10**-9},{lazy},{block_size},"{prefetch_storage}"\n' ) data_per_point = tractogram_obj.data_per_point data_per_streamline = tractogram_obj.data_per_streamline sft = StatefulTractogram( streamlines, data["img"], Space.RASMM, origin=Origin.NIFTI, data_per_point=data_per_point, data_per_streamline=data_per_streamline, ) fiber_groups = sgmt( data["bundles"], sft, data["mapping"], data["MNI_T2_img"], alg=alg, njobs=njobs, ) save_fg(fiber_groups, data["img"], output_dir, f"prefetch{len(path)-1}") return fiber_groups
def read_prefetched(path, lazy, block_size, prefetch_storage, bfile="read_file.bench", header_bytes=1000): helpers.drop_caches() fs = S3PrefetchFileSystem() fs.invalidate_cache() with fs.open(path, block_size=block_size, prefetch_storage=prefetch_storage, header_bytes=header_bytes) as f: data = read_trk(f, lazy, bfile=bfile) print(data)
def read_prefetch_nib(paths, lazy, block_size, prefetch_storage, bfile="read_file.bench"): fs = S3PrefetchFileSystem() fs.invalidate_cache() with fs.open(paths, "rb", block_size=block_size, header_bytes=1000) as f: streamlines = TrkFile.load(f, lazy_load=lazy).streamlines
def read_prefetch_bytes(paths, lazy, block_size, prefetch_storage, bfile="read_file.bench"): fs = S3PrefetchFileSystem() fs.invalidate_cache() with fs.open( paths, "rb", block_size=block_size, prefetch_storage=prefetch_storage, header_bytes=1000, ) as f: out = f.read()
def main(): start = int(sys.argv[1]) end = int(sys.argv[2]) rep = int(sys.argv[3]) nthreads = int(sys.argv[4]) prefetch_storage = [(f'/dev/shm/{end//(end-start)}', 1*1024)] bs = 64 * 2 ** 20 lazy = True header = ["vhs-bucket/hydi-header.trk"] fs = S3PrefetchFileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[start:end] bfile=f"../results/us-west-2-xlarge/read_prefetch_{nthreads}parallel_{start}-{end}_{rep}.csv" helpers.setup_bench(bfile) read_prefetched(header+files, lazy, bs, prefetch_storage, bfile=bfile)
def bench_prefetch(size, rep, output, block_size=None, prefetch_storage=[("/dev/shm", 5*1024**2)], read_size=-1, read_len=None): fs = "pf" if read_len is None: read_len = size if read_size == -1: read_size = size if block_size is None: block_size = size # clear caches helpers.drop_caches() s3 = S3PrefetchFileSystem() start_open = perf_counter() with s3.open(f"{s3_path}{size}.out", "rb", block_size=block_size, prefetch_storage=prefetch_storage) as f: end_open = perf_counter() end = read_chunks(f, read_size, read_len, fs, rep, size, block_size, output) write_benchmark(output, fs, rep, "total", size, end - start_open, block_size, read_size, read_len) write_benchmark(output, fs, rep, "open", size, end_open - start_open, block_size, read_size, read_len)