def test_pool_highcpu_instance(client: BatchClient): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': 'lowmem'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) assert 'highcpu' in status['status']['worker'], str( (status, b.debug_info())) builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '50Mi'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) assert 'highcpu' in status['status']['worker'], str( (status, b.debug_info())) builder = client.create_batch() resources = {'cpu': '0.5', 'memory': '1Gi'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) assert 'standard' in status['status']['worker'], str( (status, b.debug_info()))
def main(args): if not args: parser().print_help() sys.exit(0) jmp = { 'billing': billing, 'list': list_batches, 'delete': delete, 'get': get, 'cancel': cancel, 'log': log, 'job': job, 'wait': wait } args, pass_through_args = parser().parse_known_args(args=args) # hailctl batch doesn't create batches client = BatchClient(None) try: if args.module == 'billing': from .billing import cli # pylint: disable=import-outside-toplevel cli.main(args, pass_through_args, client) return jmp[args.module].main(args, pass_through_args, client) finally: client.close()
def client(): session = aiohttp.ClientSession( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) client = BatchClient(session, url=os.environ.get('BATCH_URL')) yield client client.close()
def test_quota_shared_by_io_and_rootfs(client: BatchClient): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'fallocate -l 7GiB /foo'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'fallocate -l 7GiB /io/foo'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} j = builder.create_job( DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'fallocate -l 7GiB /foo; fallocate -l 7GiB /io/foo'], resources=resources, ) b = builder.submit() status = j.wait() assert status['state'] == 'Failed', str((status, b.debug_info())) job_log = j.log() assert "fallocate failed: No space left on device" in job_log['main'], str( (job_log, b.debug_info()))
def main(args): if not args: parser().print_help() sys.exit(0) jmp = { 'list': list_batches, 'delete': delete, 'get': get, 'cancel': cancel, 'log': log, 'pod_status': pod_status, 'wait': wait } args, pass_through_args = parser().parse_known_args(args=args) session = aiohttp.ClientSession( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) client = BatchClient(session, url=args.master_url) try: jmp[args.module].main(args, pass_through_args, client) finally: client.close()
def test_batch_status(client: BatchClient): b1 = client.create_batch() b1.create_job(DOCKER_ROOT_IMAGE, ['true']) b1 = b1.submit() b1.wait() b1s = b1.status() assert b1s['complete'] and b1s['state'] == 'success', str( (b1s, b1.debug_info())) b2 = client.create_batch() b2.create_job(DOCKER_ROOT_IMAGE, ['false']) b2.create_job(DOCKER_ROOT_IMAGE, ['true']) b2 = b2.submit() b2.wait() b2s = b2.status() assert b2s['complete'] and b2s['state'] == 'failure', str( (b2s, b2.debug_info())) b3 = client.create_batch() b3.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30']) b3 = b3.submit() b3s = b3.status() assert not b3s['complete'] and b3s['state'] == 'running', str( (b3s, b3.debug_info())) b3.cancel() b4 = client.create_batch() b4.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30']) b4 = b4.submit() b4.cancel() b4.wait() b4s = b4.status() assert b4s['complete'] and b4s['state'] == 'cancelled', str( (b4s, b4.debug_info()))
def test_get_nonexistent_job(client: BatchClient): try: client.get_job(1, 666) except aiohttp.ClientResponseError as e: if e.status == 404: pass else: raise
def test_get_job(client: BatchClient): b = client.create_batch() j = b.create_job(DOCKER_ROOT_IMAGE, ['true']) b = b.submit() j2 = client.get_job(*j.id) status2 = j2.status() assert (status2['batch_id'], status2['job_id']) == j.id, str( (status, b.debug_info()))
def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient): remote_tmpdir = get_user_config().get('batch', 'remote_tmpdir') script = f'''import hailtop.batch as hb backend = hb.ServiceBackend("test", remote_tmpdir="{remote_tmpdir}") b = hb.Batch(backend=backend) j = b.new_bash_job() j.command("echo hi") b.run() backend.close() ''' builder = client.create_batch() j = builder.create_job( os.environ['HAIL_HAIL_BASE_IMAGE'], [ '/bin/bash', '-c', f''' hailctl config set domain {DOMAIN} rm /deploy-config/deploy-config.json python3 -c \'{script}\'''', ], mount_tokens=True, ) b = builder.submit() status = j.wait() if NAMESPACE == 'default': assert status['state'] == 'Success', str((status, b.debug_info())) else: assert status['state'] == 'Failed', str((status, b.debug_info())) assert "Please log in" in j.log()['main'], (str(j.log()['main']), status) builder = client.create_batch() j = builder.create_job( os.environ['HAIL_HAIL_BASE_IMAGE'], [ '/bin/bash', '-c', f''' jq '.default_namespace = "default"' /deploy-config/deploy-config.json > tmp.json mv tmp.json /deploy-config/deploy-config.json python3 -c \'{script}\'''', ], mount_tokens=True, ) b = builder.submit() status = j.wait() if NAMESPACE == 'default': assert status['state'] == 'Success', str((status, b.debug_info())) else: assert status['state'] == 'Failed', str((status, b.debug_info())) job_log = j.log() assert "Please log in" in job_log['main'], str( (job_log, b.debug_info()))
def __init__(self, billing_project=None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: raise ValueError( f'the billing_project parameter of ServiceBackend must be set ' f'or run `hailctl config set batch/billing_project ' f'YOUR_BILLING_PROJECT`') self._batch_client = BatchClient(billing_project)
def test_bad_token(self): token = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode('ascii') bc = BatchClient(_token=token, _service='batch2') try: b = bc.create_batch() j = b.create_job('ubuntu:18.04', ['false']) b.submit() assert False, j except aiohttp.ClientResponseError as e: assert e.status == 401, e finally: bc.close()
def test_delete_batch(client: BatchClient): b = client.create_batch() j = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30']) b = b.submit() b.delete() # verify doesn't exist try: client.get_job(*j.id) except aiohttp.ClientResponseError as e: if e.status == 404: pass else: raise
def test_create_idempotence(client: BatchClient): token = secrets.token_urlsafe(32) builder1 = client.create_batch(token=token) builder2 = client.create_batch(token=token) b1 = builder1._create() b2 = builder2._create() assert b1.id == b2.id
def test_batch(client: BatchClient): b = client.create_batch() j1 = b.create_job(DOCKER_ROOT_IMAGE, ['false']) j2 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1']) j3 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30']) b = b.submit() j1.wait() j2.wait() b.cancel() b.wait() bstatus = legacy_batch_status(b) assert len(bstatus['jobs']) == 3, str((bstatus, b.debug_info())) state_count = collections.Counter([j['state'] for j in bstatus['jobs']]) n_cancelled = state_count['Cancelled'] n_complete = state_count['Error'] + state_count['Failed'] + state_count[ 'Success'] assert n_cancelled <= 1, str((bstatus, b.debug_info())) assert n_cancelled + n_complete == 3, str((bstatus, b.debug_info())) n_failed = sum([ j['exit_code'] > 0 for j in bstatus['jobs'] if j['state'] in ('Failed', 'Error') ]) assert n_failed == 1, str((bstatus, b.debug_info()))
def test_verify_access_to_public_internet(client: BatchClient): builder = client.create_batch() j = builder.create_job(os.environ['HAIL_CURL_IMAGE'], ['curl', '-fsSL', 'example.com']) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info()))
def test_list_jobs(client: BatchClient): b = client.create_batch() j_success = b.create_job(DOCKER_ROOT_IMAGE, ['true']) j_failure = b.create_job(DOCKER_ROOT_IMAGE, ['false']) j_error = b.create_job(DOCKER_ROOT_IMAGE, ['sleep 5'], attributes={'tag': 'bar'}) j_running = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1800'], attributes={'tag': 'foo'}) b = b.submit() j_success.wait() j_failure.wait() j_error.wait() def assert_job_ids(expected, q=None): jobs = b.jobs(q=q) actual = set([j['job_id'] for j in jobs]) assert actual == expected, str((jobs, b.debug_info())) assert_job_ids({j_success.job_id}, 'success') assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id}, 'done') assert_job_ids({j_running.job_id}, '!done') assert_job_ids({j_running.job_id}, 'tag=foo') assert_job_ids({j_error.job_id, j_running.job_id}, 'has:tag') assert_job_ids( {j_success.job_id, j_failure.job_id, j_error.job_id, j_running.job_id}, None) b.cancel()
def test_include_jobs(client: BatchClient): b1 = client.create_batch() for i in range(2): b1.create_job(DOCKER_ROOT_IMAGE, ['true']) b1 = b1.submit() s = b1.status() assert 'jobs' not in s, str((s, b1.debug_info()))
def test_unknown_image(client: BatchClient): b = client.create_batch() j = b.create_job(f'{DOCKER_PREFIX}/does-not-exist', ['echo', 'test']) b = b.submit() status = j.wait() assert j._get_exit_code(status, 'main') is None assert status['status']['container_statuses']['main'][ 'short_error'] == 'image not found', str((status, b.debug_info()))
def test_nonzero_storage(client: BatchClient): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '20Gi'} j = builder.create_job('ubuntu:18.04', ['/bin/sh', '-c', 'true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info()))
def test_list_batches(client: BatchClient): tag = secrets.token_urlsafe(64) b1 = client.create_batch(attributes={'tag': tag, 'name': 'b1'}) b1.create_job(DOCKER_ROOT_IMAGE, ['sleep', '3600']) b1 = b1.submit() b2 = client.create_batch(attributes={'tag': tag, 'name': 'b2'}) b2.create_job(DOCKER_ROOT_IMAGE, ['echo', 'test']) b2 = b2.submit() batch_id_test_universe = {b1.id, b2.id} def assert_batch_ids(expected: Set[int], q=None): assert expected.issubset(batch_id_test_universe) max_id = max(batch_id_test_universe) min_id = min(batch_id_test_universe) span = max_id - min_id + 1 # list_batches returns all batches for all prev run tests so we set a limit batches = client.list_batches(q, last_batch_id=max_id + 1, limit=span) full_actual = {b.id for b in batches} actual = full_actual.intersection(batch_id_test_universe) assert actual == expected, str( (full_actual, max_id, span, b1.debug_info(), b2.debug_info())) assert_batch_ids({b1.id, b2.id}) assert_batch_ids({b1.id, b2.id}, f'tag={tag}') b2.wait() assert_batch_ids({b1.id}, f'!complete tag={tag}') assert_batch_ids({b2.id}, f'complete tag={tag}') assert_batch_ids({b1.id}, f'!success tag={tag}') assert_batch_ids({b2.id}, f'success tag={tag}') b1.cancel() b1.wait() assert_batch_ids({b1.id}, f'!success tag={tag}') assert_batch_ids({b2.id}, f'success tag={tag}') assert_batch_ids(set(), f'!complete tag={tag}') assert_batch_ids({b1.id, b2.id}, f'complete tag={tag}') assert_batch_ids({b2.id}, f'tag={tag} name=b2')
def test_long_log_line(client: BatchClient): b = client.create_batch() j = b.create_job( DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'for _ in {0..70000}; do echo -n a; done']) b = b.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info()))
def test_exit_code_duration(client: BatchClient): builder = client.create_batch() j = builder.create_job(DOCKER_ROOT_IMAGE, ['bash', '-c', 'exit 7']) b = builder.submit() status = j.wait() assert status['exit_code'] == 7, str((status, b.debug_info())) assert isinstance(status['duration'], int), str((status, b.debug_info())) assert j._get_exit_code(status, 'main') == 7, str((status, b.debug_info()))
def test_pool_standard_instance_cheapest(client: BatchClient): builder = client.create_batch() resources = {'cpu': '1', 'memory': '2.5Gi'} j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) assert 'standard' in status['status']['worker'], str( (status, b.debug_info()))
def test_attached_disk(client: BatchClient): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '400Gi'} j = builder.create_job( UBUNTU_IMAGE, ['/bin/sh', '-c', 'df -h; fallocate -l 390GiB /io/foo'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info()))
def __init__(self, billing_project: str = None, bucket: str = None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: raise ValueError( 'the billing_project parameter of ServiceBackend must be set ' 'or run `hailctl config set batch/billing_project ' 'MY_BILLING_PROJECT`') self._batch_client = BatchClient(billing_project) if bucket is None: bucket = get_user_config().get('batch', 'bucket', fallback=None) if bucket is None: raise ValueError( 'the bucket parameter of ServiceBackend must be set ' 'or run `hailctl config set batch/bucket ' 'MY_BUCKET`') self._bucket_name = bucket
def test_job_private_instance_nonpreemptible(client: BatchClient): builder = client.create_batch() resources = {'machine_type': 'n1-standard-1', 'preemptible': False} j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) assert 'job-private' in status['status']['worker'], str( (status, b.debug_info()))
def test_out_of_memory(client: BatchClient): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} j = builder.create_job('python:3.6-slim-stretch', ['python', '-c', 'x = "a" * 1000**3'], resources=resources) b = builder.submit() status = j.wait() assert j._get_out_of_memory(status, 'main'), str((status, b.debug_info()))
def test_cwd_from_image_workdir(client: BatchClient): builder = client.create_batch() j = builder.create_job(os.environ['HAIL_WORKDIR_IMAGE'], ['/bin/sh', '-c', 'pwd']) b = builder.submit() status = j.wait() assert status['state'] == 'Success', str((status, b.debug_info())) job_log = j.log() assert "/work" in job_log['main'], str((job_log, b.debug_info()))
def test_timeout(client: BatchClient): builder = client.create_batch() j = builder.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'], timeout=5) b = builder.submit() status = j.wait() assert status['state'] == 'Error', str((status, b.debug_info())) error_msg = j._get_error(status, 'main') assert error_msg and 'JobTimeoutError' in error_msg, str( (error_msg, b.debug_info())) assert j.exit_code(status) is None, str((status, b.debug_info()))
def test_bad_jwt_key(self): fname = pkg_resources.resource_filename(__name__, 'jwt-test-user.json') with open(fname) as f: userdata = json.loads(f.read()) token = hj.JWTClient(hj.JWTClient.generate_key()).encode(userdata) session = aiohttp.ClientSession( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) bc = BatchClient(session, url=os.environ.get('BATCH_URL'), token=token) try: b = bc.create_batch() j = b.create_job('alpine', ['false']) b.submit() assert False, j except aiohttp.ClientResponseError as e: if e.status == 401: pass else: assert False, e finally: bc.close()