def main(prefetch_storage, block_size, n_files, lazy, reps, types, bfile): types = list(types) header = ["vhs-bucket/hydi-header.trk"] fs = S3FileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[:n_files] results_path = "../results/us-west-2-xlarge/" if bfile == "": bfile = os.path.join( results_path, f"readnib_{n_files}f_{'lazy' if lazy else 'nolazy'}_{reps}r_{block_size}b.out", ) else: bfile = os.path.join(results_path, bfile) helpers.setup_bench(bfile) for _ in range(reps): random.shuffle(types) for t in types: print(t) helpers.drop_caches() if t == "mem": mem_files = [ os.path.join("/dev/shm", os.path.basename(p)) for p in files ] fs.get(files, mem_files) helpers.drop_caches() read_mem(mem_files, lazy, bfile=bfile) for p in mem_files: os.unlink(p) elif t == "prefetch": read_prefetched( header + files, lazy, block_size, prefetch_storage, bfile=bfile ) else: read_s3fs(files, lazy, block_size, bfile=bfile)
def main(): start = int(sys.argv[1]) end = int(sys.argv[2]) rep = int(sys.argv[3]) nthreads = int(sys.argv[4]) bs = 64 * 2 ** 20 lazy = True header = ["vhs-bucket/hydi-header.trk"] helpers.drop_caches() fs = S3FileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[start:end] bfile=f"../results/us-west-2-xlarge/read_s3fs_{nthreads}parallel_{start}-{end}_{rep}.csv" helpers.setup_bench(bfile) read_s3fs(files, lazy, bs, bfile=bfile)
def main(): start = int(sys.argv[1]) end = int(sys.argv[2]) rep = int(sys.argv[3]) nthreads = int(sys.argv[4]) prefetch_storage = [(f'/dev/shm/{end//(end-start)}', 1*1024)] bs = 64 * 2 ** 20 lazy = True header = ["vhs-bucket/hydi-header.trk"] fs = S3PrefetchFileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[start:end] bfile=f"../results/us-west-2-xlarge/read_prefetch_{nthreads}parallel_{start}-{end}_{rep}.csv" helpers.setup_bench(bfile) read_prefetched(header+files, lazy, bs, prefetch_storage, bfile=bfile)
def main(): bfile = "../results/us-west-2-xlarge/readcmp-1-5f.out" reps = 5 n_files = 5 block_size = 64 * 2**20 prefetch_storage = [("/dev/shm", 1024)] lazy = False types = ["mem", "mem_nib", "s3fs", "s3fs_nib", "prefetch", "prefetch_nib"] fs = S3FileSystem() header = ["vhs-bucket/hydi-header.trk"] all_paths = fs.glob("hydi-tractography/hydi*") all_mem_paths = [ os.path.join("/dev/shm", os.path.basename(p)) for p in all_paths ] helpers.setup_bench(bfile) for _ in range(reps): for i in range(1, n_files + 1): paths = all_paths[0:i] mem_paths = all_mem_paths[0:i] random.shuffle(types) for t in types: helpers.drop_caches() if "mem" in t: fs.get(paths, mem_paths) helpers.drop_caches() if t == "mem": read_mem_bytes(mem_paths, lazy, bfile=bfile) else: read_mem_nib(mem_paths, lazy, bfile=bfile) for p in mem_paths: os.unlink(p) else: if "s3fs_nib" in t: read_s3fs_nib(paths, lazy, block_size, bfile=bfile) elif "s3fs" in t: read_s3fs_bytes(paths, lazy, block_size, bfile=bfile) elif "prefetch_nib" in t: read_prefetch_nib( header + paths, lazy, block_size, prefetch_storage, bfile=bfile, ) else: read_prefetch_bytes( header + paths, lazy, block_size, prefetch_storage, bfile=bfile, )
def main( file_type, prefetch_storage, block_size, n_files, reps, types, output_dir, nbins, dask, nworkers, instance, lazy, ): types = list(types) fs = S3FileSystem() if file_type == "orig": header = ["vhs-bucket/hydi-header.trk"] files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[:n_files] else: header = ["vhs-bucket/shards/hydi_shard_header.trk"] files = fs.glob("vhs-bucket/shards/hydi_tracks.*.trk")[:n_files] results_path = op.join("../results/", instance) makedirs(results_path, exist_ok=True) bfile = op.join( results_path, f"histogram_{file_type}_{n_files}f_{reps}r_{block_size}b_{nbins}bins_{str(nworkers) + 'dask' if dask else 'seq'}_{'lazy' if lazy else 'nolazy'}.out", ) helpers.setup_bench(bfile) if dask: cluster = LocalCluster(n_workers=nworkers) client = Client(cluster) for r in range(reps): # random.shuffle(types) for t in types: print(t) helpers.drop_caches() if dask: results = [] if t == "s3fs": print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( histogram_s3fs, files[i * f_per_w:(i + 1) * f_per_w], lazy, block_size, nbins=nbins, output_dir=output_dir, bfile=bfile, ) results.append(seg) else: print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( histogram_prefetch, header + files[i * f_per_w:(i + 1) * f_per_w], lazy, block_size, prefetch_storage, nbins=nbins, output_dir=output_dir, bfile=bfile, ) results.append(seg) print(client.gather(results)) else: if t == "s3fs": histogram_s3fs( files, lazy, block_size, nbins=nbins, output_dir=output_dir, bfile=bfile, ) else: histogram_prefetch( header + files, lazy, block_size, prefetch_storage, nbins=nbins, output_dir=output_dir, bfile=bfile, )
def main(prefetch_storage, block_size, n_files, reps, types, nworkers): types = list(types) header = ["vhs-bucket/hydi-header.trk"] fs = S3FileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[:n_files] print(files) results_path = "../results/" bfile = op.join( results_path, f"real_{n_files}f_{reps}r_{block_size}b_{nworkers}w-recobundles.out", ) helpers.setup_bench(bfile) cluster = LocalCluster(n_workers=nworkers, resources={"CPU": 3}) client = Client(cluster) for r in range(reps): # random.shuffle(types) for t in types: print("***", t, "***") helpers.drop_caches() print(client) data = {} results = [] if t == "s3fs": print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( segmentation_s3fs, files[i * f_per_w:(i + 1) * f_per_w], False, block_size, **data, bfile=bfile, ) results.append(seg) else: print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( segmentation_prefetch, header + files[i * f_per_w:(i + 1) * f_per_w], False, block_size, prefetch_storage, **data, bfile=bfile, ) results.append(seg) print(client.gather(results)) system("pkill -f joblib")