def copy_input(task, r): if isinstance(r, InputResourceFile): if r not in copied_input_resource_files: copied_input_resource_files.add(r) if r._input_path.startswith('gs://'): return [ f'gsutil cp {r._input_path} {r._get_path(tmpdir)}' ] else: absolute_input_path = shq( os.path.realpath(r._input_path)) if task._image is not None: # pylint: disable-msg=W0640 return [ f'cp {absolute_input_path} {r._get_path(tmpdir)}' ] else: return [ f'ln -sf {absolute_input_path} {r._get_path(tmpdir)}' ] else: return [] else: assert isinstance(r, TaskResourceFile) return []
async def docker_run(*args: str): script = ' '.join([shq(a) for a in args]) outerr = await check_shell_output(script) cid = outerr[0].decode('ascii').strip() outerr = await check_shell_output(f'docker wait {cid}') exit_code = int(outerr[0].decode('ascii').strip()) return cid, exit_code == 0
async def docker_run(*args: str): script = ' '.join([shq(a) for a in args]) outerr = await check_shell_output(script, echo=True) print(f'Container output: {outerr[0]}\n' f'Container error: {outerr[1]}') cid = outerr[0].decode('ascii').strip() outerr = await check_shell_output(f'docker wait {cid}') exit_code = int(outerr[0].decode('ascii').strip()) return cid, exit_code == 0
def handler(match_obj): groups = match_obj.groupdict() if groups['JOB']: raise BatchException( f"found a reference to a Job object in command '{command}'." ) if groups['BATCH']: raise BatchException( f"found a reference to a Batch object in command '{command}'." ) if groups['PYTHON_RESULT'] and not allow_python_results: raise BatchException( f"found a reference to a PythonResult object. hint: Use one of the methods `as_str`, `as_json` or `as_repr` on a PythonResult. command: '{command}'" ) assert groups['RESOURCE_FILE'] or groups[ 'RESOURCE_GROUP'] or groups['PYTHON_RESULT'] r_uid = match_obj.group() r = self._batch._resource_map.get(r_uid) if r is None: raise BatchException( f"undefined resource '{r_uid}' in command '{command}'.\n" f"Hint: resources must be from the same batch as the current job." ) if r._source != self: self._add_inputs(r) if r._source is not None: if r not in r._source._valid: name = r._source._resources_inverse[r] raise BatchException( f"undefined resource '{name}'\n" f"Hint: resources must be defined within " f"the job methods 'command' or 'declare_resource_group'" ) self._dependencies.add(r._source) r._source._add_internal_outputs(r) else: _add_resource_to_set(self._valid, r) self._mentioned.add(r) return '${BATCH_TMPDIR}' + shq(r._get_path(''))
def build(self, batch, code, deploy): if self.inputs: input_files = [] for i in self.inputs: input_files.append( (f'{BUCKET}/build/{batch.attributes["token"]}{i["from"]}', f'/io/{os.path.basename(i["to"])}')) else: input_files = None config = self.input_config(code, deploy) if self.context_path: context = f'repo/{self.context_path}' init_context = '' else: context = 'context' init_context = 'mkdir context' dockerfile = 'Dockerfile' render_dockerfile = f'python3 jinja2_render.py {shq(json.dumps(config))} {shq(f"repo/{self.dockerfile}")} Dockerfile' if self.publish_as: published_latest = shq( f'gcr.io/{GCP_PROJECT}/{self.publish_as}:latest') pull_published_latest = f'docker pull {shq(published_latest)} || true' cache_from_published_latest = f'--cache-from {shq(published_latest)}' else: pull_published_latest = '' cache_from_published_latest = '' push_image = f''' time docker push {self.image} ''' if deploy and self.publish_as: push_image = f''' docker tag {shq(self.image)} {self.base_image}:latest docker push {self.base_image}:latest ''' + push_image copy_inputs = '' if self.inputs: for i in self.inputs: # to is relative to docker context copy_inputs = copy_inputs + f''' mkdir -p {shq(os.path.dirname(f'{context}{i["to"]}'))} cp {shq(f'/io/{os.path.basename(i["to"])}')} {shq(f'{context}{i["to"]}')} ''' script = f''' set -ex date rm -rf repo mkdir repo (cd repo; {code.checkout_script()}) {render_dockerfile} {init_context} {copy_inputs} FROM_IMAGE=$(awk '$1 == "FROM" {{ print $2; exit }}' {shq(dockerfile)}) gcloud -q auth activate-service-account \ --key-file=/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json gcloud -q auth configure-docker docker pull $FROM_IMAGE {pull_published_latest} docker build --memory="1.5g" --cpu-period=100000 --cpu-quota=100000 -t {shq(self.image)} \ -f {dockerfile} \ --cache-from $FROM_IMAGE {cache_from_published_latest} \ {context} {push_image} date ''' log.info(f'step {self.name}, script:\n{script}') volumes = [{ 'volume': { 'name': 'docker-sock-volume', 'hostPath': { 'path': '/var/run/docker.sock', 'type': 'File' } }, 'volume_mount': { 'mountPath': '/var/run/docker.sock', 'name': 'docker-sock-volume' } }, { 'volume': { 'name': 'gcr-push-service-account-key', 'secret': { 'optional': False, 'secretName': 'gcr-push-service-account-key' } }, 'volume_mount': { 'mountPath': '/secrets/gcr-push-service-account-key', 'name': 'gcr-push-service-account-key', 'readOnly': True } }] self.job = batch.create_job(CI_UTILS_IMAGE, command=['bash', '-c', script], resources={ 'requests': { 'memory': '2G', 'cpu': '1' }, 'limits': { 'memory': '2G', 'cpu': '1' } }, attributes={'name': self.name}, volumes=volumes, input_files=input_files, parents=self.deps_parents())
def build(self, batch, code, scope): if self.inputs: input_files = [] for i in self.inputs: input_files.append( (f'{BUCKET}/build/{batch.attributes["token"]}{i["from"]}', f'/io/{os.path.basename(i["to"])}')) else: input_files = None config = self.input_config(code, scope) if self.context_path: context = f'repo/{self.context_path}' init_context = '' else: context = 'context' init_context = 'mkdir context' rendered_dockerfile = 'Dockerfile' if isinstance(self.dockerfile, dict): assert ['inline'] == list(self.dockerfile.keys()) render_dockerfile = f'echo {shq(self.dockerfile["inline"])} > Dockerfile.{self.token};\n' unrendered_dockerfile = f'Dockerfile.{self.token}' else: assert isinstance(self.dockerfile, str) render_dockerfile = '' unrendered_dockerfile = f'repo/{self.dockerfile}' render_dockerfile += ( f'python3 jinja2_render.py {shq(json.dumps(config))} ' f'{shq(unrendered_dockerfile)} {shq(rendered_dockerfile)}') if self.publish_as: published_latest = shq( f'gcr.io/{GCP_PROJECT}/{self.publish_as}:latest') pull_published_latest = f'retry docker pull {shq(published_latest)} || true' cache_from_published_latest = f'--cache-from {shq(published_latest)}' else: pull_published_latest = '' cache_from_published_latest = '' push_image = f''' time retry docker push {self.image} ''' if scope == 'deploy' and self.publish_as and not is_test_deployment: push_image = f''' docker tag {shq(self.image)} {self.base_image}:latest retry docker push {self.base_image}:latest ''' + push_image copy_inputs = '' if self.inputs: for i in self.inputs: # to is relative to docker context copy_inputs = copy_inputs + f''' mkdir -p {shq(os.path.dirname(f'{context}{i["to"]}'))} cp {shq(f'/io/{os.path.basename(i["to"])}')} {shq(f'{context}{i["to"]}')} ''' script = f''' set -ex date { RETRY_FUNCTION_SCRIPT } rm -rf repo mkdir repo (cd repo; {code.checkout_script()}) {render_dockerfile} {init_context} {copy_inputs} FROM_IMAGE=$(awk '$1 == "FROM" {{ print $2; exit }}' {shq(rendered_dockerfile)}) gcloud -q auth activate-service-account \ --key-file=/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json gcloud -q auth configure-docker retry docker pull $FROM_IMAGE {pull_published_latest} docker build --memory="1.5g" --cpu-period=100000 --cpu-quota=100000 -t {shq(self.image)} \ -f {rendered_dockerfile} \ --cache-from $FROM_IMAGE {cache_from_published_latest} \ {context} {push_image} date ''' log.info(f'step {self.name}, script:\n{script}') self.job = batch.create_job(CI_UTILS_IMAGE, command=['bash', '-c', script], mount_docker_socket=True, secrets=[{ 'namespace': BATCH_PODS_NAMESPACE, 'name': 'gcr-push-service-account-key', 'mount_path': '/secrets/gcr-push-service-account-key' }], resources={ 'memory': '2G', 'cpu': '1' }, attributes={'name': self.name}, input_files=input_files, parents=self.deps_parents())
def _run(self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, **backend_kwargs): # pylint: disable=R0915 """ Execute a batch. Warning ------- This method should not be called directly. Instead, use :meth:`.batch.Batch.run`. Parameters ---------- batch: Batch to execute. dry_run: If `True`, don't execute code. verbose: If `True`, print debugging output. delete_scratch_on_exit: If `True`, delete temporary directories with intermediate files. """ if backend_kwargs: raise ValueError(f'LocalBackend does not support any of these keywords: {backend_kwargs}') tmpdir = self._get_scratch_dir() lines = ['set -e' + ('x' if verbose else ''), '\n', '# change cd to tmp directory', f"cd {tmpdir}", '\n'] copied_input_resource_files = set() os.makedirs(tmpdir + '/inputs/', exist_ok=True) if batch.requester_pays_project: requester_pays_project = f'-u {batch.requester_pays_project}' else: requester_pays_project = '' def copy_input(job, r): if isinstance(r, resource.InputResourceFile): if r not in copied_input_resource_files: copied_input_resource_files.add(r) if r._input_path.startswith('gs://'): return [f'gsutil {requester_pays_project} cp {shq(r._input_path)} {shq(r._get_path(tmpdir))}'] absolute_input_path = os.path.realpath(r._input_path) dest = r._get_path(tmpdir) dir = os.path.dirname(dest) os.makedirs(dir, exist_ok=True) if job._image is not None: # pylint: disable-msg=W0640 return [f'cp {shq(absolute_input_path)} {shq(dest)}'] return [f'ln -sf {shq(absolute_input_path)} {shq(dest)}'] return [] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) return [] def copy_external_output(r): def _cp(dest): if not dest.startswith('gs://'): dest = os.path.abspath(dest) directory = os.path.dirname(dest) os.makedirs(directory, exist_ok=True) return 'cp' return f'gsutil {requester_pays_project} cp' if isinstance(r, resource.InputResourceFile): return [f'{_cp(dest)} {shq(r._input_path)} {shq(dest)}' for dest in r._output_paths] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) return [f'{_cp(dest)} {r._get_path(tmpdir)} {shq(dest)}' for dest in r._output_paths] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, resource.ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(tmpdir) dest = f'{r._get_path(tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks write_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] if write_inputs: lines += ["# Write input resources to output destinations"] lines += write_inputs lines += ['\n'] for job in batch._jobs: if isinstance(job, _job.PythonJob): job._compile(tmpdir, tmpdir) os.makedirs(f'{tmpdir}/{job._job_id}/', exist_ok=True) lines.append(f"# {job._job_id}: {job.name if job.name else ''}") lines += [x for r in job._inputs for x in copy_input(job, r)] lines += [x for r in job._mentioned for x in symlink_input_resource_group(r)] resource_defs = [r._declare(tmpdir) for r in job._mentioned] env = [f'export {k}={v}' for k, v in job._env.items()] job_shell = job._shell if job._shell else self._DEFAULT_SHELL defs = '; '.join(resource_defs) + '; ' if resource_defs else '' joined_env = '; '.join(env) + '; ' if env else '' cmd = " && ".join(f'{{\n{x}\n}}' for x in job._command) quoted_job_script = shq(joined_env + defs + cmd) if job._image: cpu = f'--cpus={job._cpu}' if job._cpu else '' memory = job._memory if memory is not None: memory_ratios = {'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3} if memory in memory_ratios: if job._cpu is not None: mcpu = parse_cpu_in_mcpu(job._cpu) if mcpu is not None: memory = str(int(memory_ratios[memory] * (mcpu / 1000))) else: raise BatchException(f'invalid value for cpu: {job._cpu}') else: raise BatchException(f'must specify cpu when using {memory} to specify the memory') memory = f'-m {memory}' if memory else '' lines.append(f"docker run " "--entrypoint=''" f"{self._extra_docker_run_flags} " f"-v {tmpdir}:{tmpdir} " f"-w {tmpdir} " f"{memory} " f"{cpu} " f"{job._image} " f"{job_shell} -c {quoted_job_script}") else: lines.append(f"{job_shell} -c {quoted_job_script}") lines += [x for r in job._external_outputs for x in copy_external_output(r)] lines += ['\n'] script = "\n".join(lines) if dry_run: print(lines) else: try: sp.check_call(script, shell=True) except sp.CalledProcessError as e: print(e) print(e.output) raise finally: if delete_scratch_on_exit: sp.run(f'rm -rf {tmpdir}', shell=True, check=False) print('Batch completed successfully!')
def _get_path(self, directory): assert self._source is not None assert self._value is not None return shq(directory + '/' + self._source._uid + '/' + self._value)
def _get_path(self, directory): assert self._value is not None return shq(directory + '/inputs/' + self._value)
async def _async_run( self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, wait: bool = True, open: bool = False, disable_progress_bar: bool = False, callback: Optional[str] = None, token: Optional[str] = None, **backend_kwargs): # pylint: disable-msg=too-many-statements if backend_kwargs: raise ValueError( f'ServiceBackend does not support any of these keywords: {backend_kwargs}' ) build_dag_start = time.time() uid = uuid.uuid4().hex[:6] batch_remote_tmpdir = f'{self.remote_tmpdir}{uid}' local_tmpdir = f'/io/batch/{uid}' default_image = 'ubuntu:20.04' attributes = copy.deepcopy(batch.attributes) if batch.name is not None: attributes['name'] = batch.name bc_batch = self._batch_client.create_batch( attributes=attributes, callback=callback, token=token, cancel_after_n_failures=batch._cancel_after_n_failures) n_jobs_submitted = 0 used_remote_tmpdir = False job_to_client_job_mapping: Dict[_job.Job, bc.Job] = {} jobs_to_command = {} commands = [] bash_flags = 'set -e' + ('x' if verbose else '') def copy_input(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(batch_remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), r._get_path(batch_remote_tmpdir))] def copy_external_output(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, dest) for dest in r._output_paths] assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [(r._get_path(local_tmpdir), dest) for dest in r._output_paths] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, resource.ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(local_tmpdir) dest = f'{r._get_path(local_tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks write_external_inputs = [ x for r in batch._input_resources for x in copy_external_output(r) ] if write_external_inputs: transfers_bytes = orjson.dumps([{ "from": src, "to": dest } for src, dest in write_external_inputs]) transfers = transfers_bytes.decode('utf-8') write_cmd = [ 'python3', '-m', 'hailtop.aiotools.copy', 'null', transfers ] if dry_run: commands.append(' '.join(shq(x) for x in write_cmd)) else: j = bc_batch.create_job( image=HAIL_GENETICS_HAIL_IMAGE, command=write_cmd, attributes={'name': 'write_external_inputs'}) jobs_to_command[j] = ' '.join(shq(x) for x in write_cmd) n_jobs_submitted += 1 pyjobs = [j for j in batch._jobs if isinstance(j, _job.PythonJob)] for job in pyjobs: if job._image is None: version = sys.version_info if version.major != 3 or version.minor not in (6, 7, 8): raise BatchException( f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})" ) job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' with tqdm(total=len(batch._jobs), desc='upload code', disable=disable_progress_bar) as pbar: async def compile_job(job): used_remote_tmpdir = await job._compile(local_tmpdir, batch_remote_tmpdir, dry_run=dry_run) pbar.update(1) return used_remote_tmpdir used_remote_tmpdir_results = await bounded_gather( *[functools.partial(compile_job, j) for j in batch._jobs], parallelism=150) used_remote_tmpdir |= any(used_remote_tmpdir_results) for job in tqdm(batch._jobs, desc='create job objects', disable=disable_progress_bar): inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [ x for r in job._internal_outputs for x in copy_internal_output(r) ] if outputs: used_remote_tmpdir = True outputs += [ x for r in job._external_outputs for x in copy_external_output(r) ] symlinks = [ x for r in job._mentioned for x in symlink_input_resource_group(r) ] if job._image is None: if verbose: print( f"Using image '{default_image}' since no image was specified." ) make_local_tmpdir = f'mkdir -p {local_tmpdir}/{job._dirname}' job_command = [cmd.strip() for cmd in job._wrapper_code] prepared_job_command = (f'{{\n{x}\n}}' for x in job_command) cmd = f''' {bash_flags} {make_local_tmpdir} {"; ".join(symlinks)} {" && ".join(prepared_job_command)} ''' user_code = '\n\n'.join(job._user_code) if job._user_code else None if dry_run: formatted_command = f''' ================================================================================ # Job {job._job_id} {f": {job.name}" if job.name else ''} -------------------------------------------------------------------------------- ## USER CODE -------------------------------------------------------------------------------- {user_code} -------------------------------------------------------------------------------- ## COMMAND -------------------------------------------------------------------------------- {cmd} ================================================================================ ''' commands.append(formatted_command) continue parents = [job_to_client_job_mapping[j] for j in job._dependencies] attributes = copy.deepcopy( job.attributes) if job.attributes else {} if job.name: attributes['name'] = job.name resources: Dict[str, Any] = {} if job._cpu: resources['cpu'] = job._cpu if job._memory: resources['memory'] = job._memory if job._storage: resources['storage'] = job._storage if job._machine_type: resources['machine_type'] = job._machine_type if job._preemptible is not None: resources['preemptible'] = job._preemptible image = job._image if job._image else default_image image_ref = parse_docker_image_reference(image) if image_ref.hosted_in('dockerhub') and image_ref.name( ) not in HAIL_GENETICS_IMAGES: warnings.warn(f'Using an image {image} from Docker Hub. ' f'Jobs may fail due to Docker Hub rate limits.') env = {**job._env, 'BATCH_TMPDIR': local_tmpdir} j = bc_batch.create_job( image=image, command=[ job._shell if job._shell else DEFAULT_SHELL, '-c', cmd ], parents=parents, attributes=attributes, resources=resources, input_files=inputs if len(inputs) > 0 else None, output_files=outputs if len(outputs) > 0 else None, always_run=job._always_run, timeout=job._timeout, cloudfuse=job._cloudfuse if len(job._cloudfuse) > 0 else None, env=env, requester_pays_project=batch.requester_pays_project, mount_tokens=True, user_code=user_code) n_jobs_submitted += 1 job_to_client_job_mapping[job] = j jobs_to_command[j] = cmd if dry_run: print("\n\n".join(commands)) return None if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) j = bc_batch.create_job(image=HAIL_GENETICS_HAIL_IMAGE, command=[ 'python3', '-m', 'hailtop.aiotools.delete', batch_remote_tmpdir ], parents=parents, attributes={'name': 'remove_tmpdir'}, always_run=True) jobs_to_command[j] = cmd n_jobs_submitted += 1 if verbose: print( f'Built DAG with {n_jobs_submitted} jobs in {round(time.time() - build_dag_start, 3)} seconds.' ) submit_batch_start = time.time() batch_handle = bc_batch.submit( disable_progress_bar=disable_progress_bar) jobs_to_command = {j.id: cmd for j, cmd in jobs_to_command.items()} if verbose: print( f'Submitted batch {batch_handle.id} with {n_jobs_submitted} jobs in {round(time.time() - submit_batch_start, 3)} seconds:' ) for jid, cmd in jobs_to_command.items(): print(f'{jid}: {cmd}') print('') deploy_config = get_deploy_config() url = deploy_config.url('batch', f'/batches/{batch_handle.id}') print(f'Submitted batch {batch_handle.id}, see {url}') if open: webbrowser.open(url) if wait: print(f'Waiting for batch {batch_handle.id}...') status = batch_handle.wait() print(f'batch {batch_handle.id} complete: {status["state"]}') return batch_handle
def _run(self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, **backend_kwargs) -> None: # pylint: disable=R0915 """ Execute a batch. Warning ------- This method should not be called directly. Instead, use :meth:`.batch.Batch.run`. Parameters ---------- batch: Batch to execute. dry_run: If `True`, don't execute code. verbose: If `True`, print debugging output. delete_scratch_on_exit: If `True`, delete temporary directories with intermediate files. """ if backend_kwargs: raise ValueError( f'LocalBackend does not support any of these keywords: {backend_kwargs}' ) tmpdir = self._get_scratch_dir() def new_code_block(): return [ 'set -e' + ('x' if verbose else ''), '\n', '# change cd to tmp directory', f"cd {tmpdir}", '\n' ] def run_code(code): code = '\n'.join(code) if dry_run: print(code) else: try: sp.check_call(code, shell=True) except sp.CalledProcessError as e: print(e) print(e.output) raise copied_input_resource_files = set() os.makedirs(tmpdir + '/inputs/', exist_ok=True) requester_pays_project_json = orjson.dumps( batch.requester_pays_project).decode('utf-8') def copy_input(job, r): if isinstance(r, resource.InputResourceFile): if r not in copied_input_resource_files: copied_input_resource_files.add(r) input_scheme = url_scheme(r._input_path) if input_scheme != '': transfers_bytes = orjson.dumps([{ "from": r._input_path, "to": r._get_path(tmpdir) }]) transfers = transfers_bytes.decode('utf-8') return [ f'python3 -m hailtop.aiotools.copy {shq(requester_pays_project_json)} {shq(transfers)}' ] absolute_input_path = os.path.realpath( os.path.expanduser(r._input_path)) dest = r._get_path(os.path.expanduser(tmpdir)) dir = os.path.dirname(dest) os.makedirs(dir, exist_ok=True) if job._image is not None: # pylint: disable-msg=W0640 return [f'cp {shq(absolute_input_path)} {shq(dest)}'] return [f'ln -sf {shq(absolute_input_path)} {shq(dest)}'] return [] assert isinstance( r, (resource.JobResourceFile, resource.PythonResult)) return [] def symlink_input_resource_group(r): symlinks = [] if isinstance(r, resource.ResourceGroup) and r._source is None: for name, irf in r._resources.items(): src = irf._get_path(tmpdir) dest = f'{r._get_path(tmpdir)}.{name}' symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks def transfer_dicts_for_resource_file( res_file: Union[resource.ResourceFile, resource.PythonResult] ) -> List[dict]: if isinstance(res_file, resource.InputResourceFile): source = res_file._input_path else: assert isinstance( res_file, (resource.JobResourceFile, resource.PythonResult)) source = res_file._get_path(tmpdir) return [{ "from": source, "to": dest } for dest in res_file._output_paths] try: input_transfer_dicts = [ transfer_dict for input_resource in batch._input_resources for transfer_dict in transfer_dicts_for_resource_file( input_resource) ] if input_transfer_dicts: input_transfers = orjson.dumps(input_transfer_dicts).decode( 'utf-8') code = new_code_block() code += ["# Write input resources to output destinations"] code += [ f'python3 -m hailtop.aiotools.copy {shq(requester_pays_project_json)} {shq(input_transfers)}' ] code += ['\n'] run_code(code) for job in batch._jobs: async_to_blocking(job._compile(tmpdir, tmpdir)) os.makedirs(f'{tmpdir}/{job._dirname}/', exist_ok=True) code = new_code_block() code.append(f"# {job._job_id}: {job.name if job.name else ''}") if job._user_code: code.append('# USER CODE') user_code = [ f'# {line}' for cmd in job._user_code for line in cmd.split('\n') ] code.append('\n'.join(user_code)) code += [x for r in job._inputs for x in copy_input(job, r)] code += [ x for r in job._mentioned for x in symlink_input_resource_group(r) ] env = {**job._env, 'BATCH_TMPDIR': tmpdir} env_declarations = [f'export {k}={v}' for k, v in env.items()] joined_env = '; '.join(env_declarations) + '; ' if env else '' job_shell = job._shell if job._shell else DEFAULT_SHELL cmd = " && ".join(f'{{\n{x}\n}}' for x in job._wrapper_code) quoted_job_script = shq(joined_env + cmd) if job._image: cpu = f'--cpus={job._cpu}' if job._cpu else '' memory = job._memory if memory is not None: memory_ratios = { 'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3 } if memory in memory_ratios: if job._cpu is not None: mcpu = parse_cpu_in_mcpu(job._cpu) if mcpu is not None: memory = str( int(memory_ratios[memory] * (mcpu / 1000))) else: raise BatchException( f'invalid value for cpu: {job._cpu}') else: raise BatchException( f'must specify cpu when using {memory} to specify the memory' ) memory = f'-m {memory}' if memory else '' else: memory = '' code.append(f"docker run " "--entrypoint=''" f"{self._extra_docker_run_flags} " f"-v {tmpdir}:{tmpdir} " f"-w {tmpdir} " f"{memory} " f"{cpu} " f"{job._image} " f"{job_shell} -c {quoted_job_script}") else: code.append(f"{job_shell} -c {quoted_job_script}") output_transfer_dicts = [ transfer_dict for output_resource in job._external_outputs for transfer_dict in transfer_dicts_for_resource_file( output_resource) ] output_transfers = orjson.dumps(output_transfer_dicts).decode( 'utf-8') code += [ f'python3 -m hailtop.aiotools.copy {shq(requester_pays_project_json)} {shq(output_transfers)}' ] code += ['\n'] run_code(code) finally: if delete_scratch_on_exit: sp.run(f'rm -rf {tmpdir}', shell=True, check=False) print('Batch completed successfully!')
async def build(self, batch, pr): if self.inputs: input_files = [] for i in self.inputs: input_files.append((f'{BUCKET}/build/{batch.attributes["token"]}{i["from"]}', f'/io/{os.path.basename(i["to"])}')) else: input_files = None config = self.input_config(pr) if self.context_path: context = f'repo/{self.context_path}' init_context = '' else: context = 'context' init_context = 'mkdir context' dockerfile = 'Dockerfile' render_dockerfile = f'python3 jinja2_render.py {shq(json.dumps(config))} {shq(f"repo/{self.dockerfile}")} Dockerfile' if self.publish_as: published_latest = shq(f'gcr.io/{GCP_PROJECT}/{self.publish_as}:latest') pull_published_latest = f'docker pull {shq(published_latest)} || true' cache_from_published_latest = f'--cache-from {shq(published_latest)}' else: pull_published_latest = '' cache_from_published_latest = '' copy_inputs = '' if self.inputs: for i in self.inputs: # to is relative to docker context copy_inputs = copy_inputs + f''' mkdir -p {shq(os.path.dirname(f'{context}{i["to"]}'))} mv {shq(f'/io/{os.path.basename(i["to"])}')} {shq(f'{context}{i["to"]}')} ''' script = f''' set -ex git clone {shq(pr.target_branch.branch.repo.url)} repo git -C repo config user.email [email protected] git -C repo config user.name hail-ci-leader git -C repo remote add {shq(pr.source_repo.short_str())} {shq(pr.source_repo.url)} git -C repo fetch -q {shq(pr.source_repo.short_str())} git -C repo checkout {shq(pr.target_branch.sha)} git -C repo merge {shq(pr.source_sha)} -m 'merge PR' {render_dockerfile} {init_context} {copy_inputs} FROM_IMAGE=$(awk '$1 == "FROM" {{ print $2; exit }}' {shq(dockerfile)}) gcloud -q auth activate-service-account \ --key-file=/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json gcloud -q auth configure-docker docker pull $FROM_IMAGE {pull_published_latest} docker build -t {shq(self.image)} \ -f {dockerfile} \ --cache-from $FROM_IMAGE {cache_from_published_latest} \ {context} docker push {shq(self.image)} ''' log.info(f'step {self.name}, script:\n{script}') volumes = [{ 'volume': { 'name': 'docker-sock-volume', 'hostPath': { 'path': '/var/run/docker.sock', 'type': 'File' } }, 'volume_mount': { 'mountPath': '/var/run/docker.sock', 'name': 'docker-sock-volume' } }, { 'volume': { 'name': 'gcr-push-service-account-key', 'secret': { 'optional': False, 'secretName': 'gcr-push-service-account-key' } }, 'volume_mount': { 'mountPath': '/secrets/gcr-push-service-account-key', 'name': 'gcr-push-service-account-key', 'readOnly': True } }] sa = None if self.inputs is not None: sa = 'ci2' self.job = await batch.create_job(CI_UTILS_IMAGE, command=['bash', '-c', script], attributes={'name': self.name}, volumes=volumes, input_files=input_files, copy_service_account_name=sa, parent_ids=self.deps_parent_ids())