}) resource_tasks = {} for r in all_resources: j = b.new_job(f'create_resource_{r.name()}').cpu(4) j.command( f'hail-bench create-resources --data-dir benchmark-resources --group {r.name()}' ) j.command( f"time tar -cf {r.name()}.tar benchmark-resources/{r.name()} --exclude='*.crc'" ) j.command(f'ls -lh {r.name()}.tar') j.command(f'mv {r.name()}.tar {j.ofile}') resource_tasks[r] = j all_benchmarks = list_benchmarks() assert len(all_benchmarks) > 0 all_output = [] task_filter_regex_include = os.environ.get('BENCHMARK_REGEX_INCLUDE') task_filter_regex_exclude = os.environ.get('BENCHMARK_REGEX_EXCLUDE') if task_filter_regex_include: include = lambda t: re.match(task_filter_regex_include, t) is not None else: include = lambda t: True if task_filter_regex_exclude: exclude = lambda t: re.match(task_filter_regex_exclude, t) is not None else:
def submit(hail_code: Commit, benchmark_code: Commit, test_names: Set[str], n_replicates: int, n_iters: int): sync_check_shell(benchmark_code.checkout_script()) sys.path.insert(0, f'{benchmark_code.repo_dir()}/benchmark/python/benchmark_hail') importlib.invalidate_caches # pylint: disable=pointless-statement from benchmark_hail.run.resources import all_resources # pylint: disable=import-error, import-outside-toplevel from benchmark_hail.run.utils import list_benchmarks # pylint: disable=import-error, import-outside-toplevel output_file = f'gs://hail-benchmarks-2/benchmark/{hail_code.sha}-{benchmark_code.sha}.json' b = hb.Batch(name=f'benchmark-{hail_code.sha}', backend=hb.ServiceBackend(billing_project='hail'), default_image=BENCHMARK_IMAGE, default_cpu='2', attributes={'output_file': output_file, 'n_replicates': str(n_replicates), 'n_iters': str(n_iters), 'image': str(BENCHMARK_IMAGE), 'hail_code': str(hail_code), 'benchmark_code': str(benchmark_code)}) build_hail = b.new_job('build_hail_wheel') build_hail.command(f''' set -ex { hail_code.checkout_script() } cd hail time ./gradlew --version time make wheel time (cd python && zip -r hail.zip hail hailtop) (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) cp build/deploy/dist/hail-*-py3-none-any.whl {build_hail.wheel} ''') build_benchmark = b.new_job('build_benchmark_wheel') build_benchmark.command(f''' set -ex {benchmark_code.checkout_script()} make -C hail python/hail/hail_pip_version export HAIL_VERSION=$(cat hail/python/hail/hail_pip_version) export HAIL_BENCHMARK_VERSION=$HAIL_VERSION cd benchmark/python/ && python3 setup.py -q bdist_wheel python3 -m pip -q install dist/benchmark_hail-$HAIL_VERSION-py3-none-any.whl cp dist/benchmark_hail-$HAIL_VERSION-py3-none-any.whl {build_benchmark.wheel} ''') resource_jobs = {} for r in all_resources: j = b.new_job(f'create_resource_{r.name()}').cpu(4) j.command(f'mv {build_hail.wheel} hail--py3-none-any.whl') j.command('pip install hail--py3-none-any.whl') j.command(f'mv {build_benchmark.wheel} benchmark_hail-$HAIL_VERSION-py3-none-any.whl') j.command('pip install benchmark_hail-$HAIL_VERSION-py3-none-any.whl') j.command(f'hail-bench create-resources --data-dir benchmark-resources --group {r.name()}') j.command(f"time tar -cf {r.name()}.tar benchmark-resources/{r.name()} --exclude='*.crc'") j.command(f'ls -lh {r.name()}.tar') j.command(f'mv {r.name()}.tar {j.ofile}') resource_jobs[r] = j all_benchmarks = list_benchmarks() assert len(all_benchmarks) > 0 all_output = [] n_passed_filter = 0 job_fs = [] for benchmark in all_benchmarks: if benchmark.name in test_names: n_passed_filter += 1 for replicate in range(n_replicates): job_fs.append((benchmark.name, replicate, benchmark.groups)) log.info(f'generating {n_passed_filter} * {n_replicates} = {n_passed_filter * n_replicates} individual benchmark jobs') random.shuffle(job_fs) for name, replicate, groups in job_fs: j = b.new_job(name=f'{name}_{replicate}') j.command(f'mv {build_hail.wheel} hail--py3-none-any.whl') j.command('pip install hail--py3-none-any.whl') j.command(f'mv {build_benchmark.wheel} benchmark_hail--py3-none-any.whl') j.command('pip install benchmark_hail--py3-none-any.whl') j.command('mkdir -p benchmark-resources') for resource_group in groups: resource_job = resource_jobs[resource_group] j.command(f'mv {resource_job.ofile} benchmark-resources/{resource_group.name()}.tar') j.command(f'time tar -xf benchmark-resources/{resource_group.name()}.tar') j.command(f'MKL_NUM_THREADS=1' f'OPENBLAS_NUM_THREADS=1' f'OMP_NUM_THREADS=1' f'VECLIB_MAXIMUM_THREADS=1' f'PYSPARK_SUBMIT_ARGS="--driver-memory 6G pyspark-shell" ' f'hail-bench run -o {j.ofile} -n {n_iters} --data-dir benchmark-resources -t {name}') all_output.append(j.ofile) combine_branch_factor = int(os.environ.get('BENCHMARK_BRANCH_FACTOR', 32)) phase_i = 1 while len(all_output) > combine_branch_factor: new_output = [] job_i = 1 i = 0 while i < len(all_output): combine = b.new_job(f'combine_output_phase{phase_i}_job{job_i}') combine.command(f'mv {build_hail.wheel} hail--py3-none-any.whl') combine.command('pip install hail--py3-none-any.whl') combine.command(f'mv {build_benchmark.wheel} benchmark_hail--py3-none-any.whl') combine.command('pip install benchmark_hail--py3-none-any.whl') combine.command( f'hail-bench combine -o {combine.ofile} ' + ' '.join(all_output[i:i + combine_branch_factor])) new_output.append(combine.ofile) i += combine_branch_factor job_i += 1 phase_i += 1 all_output = new_output combine = b.new_job('final_combine_output') combine.command(f'mv {build_hail.wheel} hail--py3-none-any.whl') combine.command('pip install hail--py3-none-any.whl') combine.command(f'mv {build_benchmark.wheel} benchmark_hail--py3-none-any.whl') combine.command('pip install benchmark_hail--py3-none-any.whl') combine.command(f'hail-bench combine -o {combine.ofile} ' + ' '.join(all_output)) combine.command(f'cat {combine.ofile}') log.info(f'writing output to {output_file}') b.write_output(combine.ofile, output_file) b.run()