def test_quota_shared_by_io_and_rootfs(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE,
                           ['/bin/sh', '-c', 'fallocate -l 7GiB /foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE,
                           ['/bin/sh', '-c', 'fallocate -l 7GiB /io/foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(
        DOCKER_ROOT_IMAGE,
        ['/bin/sh', '-c', 'fallocate -l 7GiB /foo; fallocate -l 7GiB /io/foo'],
        resources=resources,
    )
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "fallocate failed: No space left on device" in job_log['main'], str(
        (job_log, b.debug_info()))
def test_pool_highcpu_instance(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': 'lowmem'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'highcpu' in status['status']['worker'], str(
        (status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '50Mi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'highcpu' in status['status']['worker'], str(
        (status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.5', 'memory': '1Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'standard' in status['status']['worker'], str(
        (status, b.debug_info()))
def test_batch_status(client: BatchClient):
    b1 = client.create_batch()
    b1.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b1 = b1.submit()
    b1.wait()
    b1s = b1.status()
    assert b1s['complete'] and b1s['state'] == 'success', str(
        (b1s, b1.debug_info()))

    b2 = client.create_batch()
    b2.create_job(DOCKER_ROOT_IMAGE, ['false'])
    b2.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b2 = b2.submit()
    b2.wait()
    b2s = b2.status()
    assert b2s['complete'] and b2s['state'] == 'failure', str(
        (b2s, b2.debug_info()))

    b3 = client.create_batch()
    b3.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b3 = b3.submit()
    b3s = b3.status()
    assert not b3s['complete'] and b3s['state'] == 'running', str(
        (b3s, b3.debug_info()))
    b3.cancel()

    b4 = client.create_batch()
    b4.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b4 = b4.submit()
    b4.cancel()
    b4.wait()
    b4s = b4.status()
    assert b4s['complete'] and b4s['state'] == 'cancelled', str(
        (b4s, b4.debug_info()))
Exemple #4
0
def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient):
    remote_tmpdir = get_user_config().get('batch', 'remote_tmpdir')
    script = f'''import hailtop.batch as hb
backend = hb.ServiceBackend("test", remote_tmpdir="{remote_tmpdir}")
b = hb.Batch(backend=backend)
j = b.new_bash_job()
j.command("echo hi")
b.run()
backend.close()
'''

    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_HAIL_BASE_IMAGE'],
        [
            '/bin/bash',
            '-c',
            f'''
hailctl config set domain {DOMAIN}
rm /deploy-config/deploy-config.json
python3 -c \'{script}\'''',
        ],
        mount_tokens=True,
    )
    b = builder.submit()
    status = j.wait()
    if NAMESPACE == 'default':
        assert status['state'] == 'Success', str((status, b.debug_info()))
    else:
        assert status['state'] == 'Failed', str((status, b.debug_info()))
        assert "Please log in" in j.log()['main'], (str(j.log()['main']),
                                                    status)

    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_HAIL_BASE_IMAGE'],
        [
            '/bin/bash',
            '-c',
            f'''
jq '.default_namespace = "default"' /deploy-config/deploy-config.json > tmp.json
mv tmp.json /deploy-config/deploy-config.json
python3 -c \'{script}\'''',
        ],
        mount_tokens=True,
    )
    b = builder.submit()
    status = j.wait()
    if NAMESPACE == 'default':
        assert status['state'] == 'Success', str((status, b.debug_info()))
    else:
        assert status['state'] == 'Failed', str((status, b.debug_info()))
        job_log = j.log()
        assert "Please log in" in job_log['main'], str(
            (job_log, b.debug_info()))
def test_include_jobs(client: BatchClient):
    b1 = client.create_batch()
    for i in range(2):
        b1.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b1 = b1.submit()
    s = b1.status()
    assert 'jobs' not in s, str((s, b1.debug_info()))
def test_verify_access_to_public_internet(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(os.environ['HAIL_CURL_IMAGE'],
                           ['curl', '-fsSL', 'example.com'])
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
def test_create_idempotence(client: BatchClient):
    token = secrets.token_urlsafe(32)
    builder1 = client.create_batch(token=token)
    builder2 = client.create_batch(token=token)
    b1 = builder1._create()
    b2 = builder2._create()
    assert b1.id == b2.id
def test_batch(client: BatchClient):
    b = client.create_batch()
    j1 = b.create_job(DOCKER_ROOT_IMAGE, ['false'])
    j2 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1'])
    j3 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b = b.submit()

    j1.wait()
    j2.wait()
    b.cancel()
    b.wait()
    bstatus = legacy_batch_status(b)

    assert len(bstatus['jobs']) == 3, str((bstatus, b.debug_info()))
    state_count = collections.Counter([j['state'] for j in bstatus['jobs']])
    n_cancelled = state_count['Cancelled']
    n_complete = state_count['Error'] + state_count['Failed'] + state_count[
        'Success']
    assert n_cancelled <= 1, str((bstatus, b.debug_info()))
    assert n_cancelled + n_complete == 3, str((bstatus, b.debug_info()))

    n_failed = sum([
        j['exit_code'] > 0 for j in bstatus['jobs']
        if j['state'] in ('Failed', 'Error')
    ])
    assert n_failed == 1, str((bstatus, b.debug_info()))
def test_list_jobs(client: BatchClient):
    b = client.create_batch()
    j_success = b.create_job(DOCKER_ROOT_IMAGE, ['true'])
    j_failure = b.create_job(DOCKER_ROOT_IMAGE, ['false'])
    j_error = b.create_job(DOCKER_ROOT_IMAGE, ['sleep 5'],
                           attributes={'tag': 'bar'})
    j_running = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1800'],
                             attributes={'tag': 'foo'})

    b = b.submit()
    j_success.wait()
    j_failure.wait()
    j_error.wait()

    def assert_job_ids(expected, q=None):
        jobs = b.jobs(q=q)
        actual = set([j['job_id'] for j in jobs])
        assert actual == expected, str((jobs, b.debug_info()))

    assert_job_ids({j_success.job_id}, 'success')
    assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id},
                   'done')
    assert_job_ids({j_running.job_id}, '!done')
    assert_job_ids({j_running.job_id}, 'tag=foo')
    assert_job_ids({j_error.job_id, j_running.job_id}, 'has:tag')
    assert_job_ids(
        {j_success.job_id, j_failure.job_id, j_error.job_id, j_running.job_id},
        None)

    b.cancel()
Exemple #10
0
def test_unknown_image(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(f'{DOCKER_PREFIX}/does-not-exist', ['echo', 'test'])
    b = b.submit()
    status = j.wait()
    assert j._get_exit_code(status, 'main') is None
    assert status['status']['container_statuses']['main'][
        'short_error'] == 'image not found', str((status, b.debug_info()))
Exemple #11
0
def test_list_batches(client: BatchClient):
    tag = secrets.token_urlsafe(64)
    b1 = client.create_batch(attributes={'tag': tag, 'name': 'b1'})
    b1.create_job(DOCKER_ROOT_IMAGE, ['sleep', '3600'])
    b1 = b1.submit()

    b2 = client.create_batch(attributes={'tag': tag, 'name': 'b2'})
    b2.create_job(DOCKER_ROOT_IMAGE, ['echo', 'test'])
    b2 = b2.submit()

    batch_id_test_universe = {b1.id, b2.id}

    def assert_batch_ids(expected: Set[int], q=None):
        assert expected.issubset(batch_id_test_universe)
        max_id = max(batch_id_test_universe)
        min_id = min(batch_id_test_universe)
        span = max_id - min_id + 1
        # list_batches returns all batches for all prev run tests so we set a limit
        batches = client.list_batches(q, last_batch_id=max_id + 1, limit=span)
        full_actual = {b.id for b in batches}
        actual = full_actual.intersection(batch_id_test_universe)
        assert actual == expected, str(
            (full_actual, max_id, span, b1.debug_info(), b2.debug_info()))

    assert_batch_ids({b1.id, b2.id})

    assert_batch_ids({b1.id, b2.id}, f'tag={tag}')

    b2.wait()

    assert_batch_ids({b1.id}, f'!complete tag={tag}')
    assert_batch_ids({b2.id}, f'complete tag={tag}')

    assert_batch_ids({b1.id}, f'!success tag={tag}')
    assert_batch_ids({b2.id}, f'success tag={tag}')

    b1.cancel()
    b1.wait()

    assert_batch_ids({b1.id}, f'!success tag={tag}')
    assert_batch_ids({b2.id}, f'success tag={tag}')

    assert_batch_ids(set(), f'!complete tag={tag}')
    assert_batch_ids({b1.id, b2.id}, f'complete tag={tag}')

    assert_batch_ids({b2.id}, f'tag={tag} name=b2')
Exemple #12
0
def test_nonzero_storage(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '20Gi'}
    j = builder.create_job('ubuntu:18.04', ['/bin/sh', '-c', 'true'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Exemple #13
0
def test_exit_code_duration(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['bash', '-c', 'exit 7'])
    b = builder.submit()
    status = j.wait()
    assert status['exit_code'] == 7, str((status, b.debug_info()))
    assert isinstance(status['duration'], int), str((status, b.debug_info()))
    assert j._get_exit_code(status, 'main') == 7, str((status, b.debug_info()))
Exemple #14
0
def test_long_log_line(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(
        DOCKER_ROOT_IMAGE,
        ['/bin/sh', '-c', 'for _ in {0..70000}; do echo -n a; done'])
    b = b.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Exemple #15
0
def test_out_of_memory(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job('python:3.6-slim-stretch',
                           ['python', '-c', 'x = "a" * 1000**3'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert j._get_out_of_memory(status, 'main'), str((status, b.debug_info()))
Exemple #16
0
def test_attached_disk(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '400Gi'}
    j = builder.create_job(
        UBUNTU_IMAGE, ['/bin/sh', '-c', 'df -h; fallocate -l 390GiB /io/foo'],
        resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Exemple #17
0
def test_cwd_from_image_workdir(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(os.environ['HAIL_WORKDIR_IMAGE'],
                           ['/bin/sh', '-c', 'pwd'])
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    job_log = j.log()
    assert "/work" in job_log['main'], str((job_log, b.debug_info()))
Exemple #18
0
def test_get_job(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b = b.submit()

    j2 = client.get_job(*j.id)
    status2 = j2.status()
    assert (status2['batch_id'], status2['job_id']) == j.id, str(
        (status, b.debug_info()))
Exemple #19
0
def test_pool_standard_instance_cheapest(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '1', 'memory': '2.5Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'standard' in status['status']['worker'], str(
        (status, b.debug_info()))
Exemple #20
0
def test_job_private_instance_nonpreemptible(client: BatchClient):
    builder = client.create_batch()
    resources = {'machine_type': 'n1-standard-1', 'preemptible': False}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'job-private' in status['status']['worker'], str(
        (status, b.debug_info()))
Exemple #21
0
def test_timeout(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'], timeout=5)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Error', str((status, b.debug_info()))
    error_msg = j._get_error(status, 'main')
    assert error_msg and 'JobTimeoutError' in error_msg, str(
        (error_msg, b.debug_info()))
    assert j.exit_code(status) is None, str((status, b.debug_info()))
Exemple #22
0
def test_user_authentication_within_job(client: BatchClient):
    batch = client.create_batch()
    cmd = ['bash', '-c', 'hailctl auth user']
    no_token = batch.create_job(os.environ['CI_UTILS_IMAGE'],
                                cmd,
                                mount_tokens=False)
    b = batch.submit()

    no_token_status = no_token.wait()
    assert no_token_status['state'] == 'Failed', str(
        (not_token_status, b.debug_info()))
Exemple #23
0
def test_out_of_storage(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '5Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE,
                           ['/bin/sh', '-c', 'fallocate -l 100GiB /foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "fallocate failed: No space left on device" in job_log['main']
Exemple #24
0
def test_garbage_image(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job('dsafaaadsf', ['echo', 'test'])
    b = builder.submit()
    status = j.wait()
    assert j._get_exit_codes(status) == {
        'main': None
    }, str((status, b.debug_info()))
    assert j._get_error(status, 'main') is not None, str(
        (status, b.debug_info()))
    assert status['state'] == 'Error', str((status, b.debug_info()))
Exemple #25
0
def test_quota_applies_to_volume(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '5Gi'}
    j = builder.create_job(os.environ['HAIL_VOLUME_IMAGE'],
                           ['/bin/sh', '-c', 'fallocate -l 100GiB /data/foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "fallocate failed: No space left on device" in job_log['main']
Exemple #26
0
def test_verify_no_access_to_google_metadata_server(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_CURL_IMAGE'],
        ['curl', '-fsSL', 'metadata.google.internal', '--max-time', '10'])
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "Could not resolve host" in job_log['main'], str(
        (job_log, b.debug_info()))
Exemple #27
0
def test_verify_no_access_to_metadata_server(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_CURL_IMAGE'],
        ['curl', '-fsSL', '169.254.169.254', '--max-time', '10'])
    builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "Connection timed out" in job_log['main'], str(
        (job_log, b.debug_info()))
Exemple #28
0
def test_job(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['echo', 'test'])
    b = builder.submit()
    status = j.wait()
    assert 'attributes' not in status, str((status, b.debug_info()))
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert status['exit_code'] == 0, str((status, b.debug_info()))
    assert j._get_exit_code(status, 'main') == 0, str((status, b.debug_info()))
    job_log = j.log()
    assert job_log['main'] == 'test\n', str((job_log, b.debug_info()))
Exemple #29
0
def test_running_job_log_and_status(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '300'])
    b = b.submit()

    while True:
        if j.status()['state'] == 'Running' or j.is_complete():
            break

    j.log()
    # FIXME after batch1 goes away, check running status
    b.cancel()
Exemple #30
0
def test_duplicate_parents(client: BatchClient):
    batch = client.create_batch()
    head = batch.create_job(DOCKER_ROOT_IMAGE, command=['echo', 'head'])
    batch.create_job(DOCKER_ROOT_IMAGE,
                     command=['echo', 'tail'],
                     parents=[head, head])
    try:
        batch = batch.submit()
    except aiohttp.ClientResponseError as e:
        assert e.status == 400
    else:
        assert False, f'should receive a 400 Bad Request {batch.id}'