Esempio n. 1
0
def test_pool_highcpu_instance(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': 'lowmem'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'highcpu' in status['status']['worker'], str(
        (status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '50Mi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'highcpu' in status['status']['worker'], str(
        (status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.5', 'memory': '1Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'standard' in status['status']['worker'], str(
        (status, b.debug_info()))
Esempio n. 2
0
def main(args):
    if not args:
        parser().print_help()
        sys.exit(0)
    jmp = {
        'billing': billing,
        'list': list_batches,
        'delete': delete,
        'get': get,
        'cancel': cancel,
        'log': log,
        'job': job,
        'wait': wait
    }

    args, pass_through_args = parser().parse_known_args(args=args)

    # hailctl batch doesn't create batches
    client = BatchClient(None)

    try:
        if args.module == 'billing':
            from .billing import cli  # pylint: disable=import-outside-toplevel
            cli.main(args, pass_through_args, client)
            return

        jmp[args.module].main(args, pass_through_args, client)
    finally:
        client.close()
Esempio n. 3
0
def client():
    session = aiohttp.ClientSession(
        raise_for_status=True,
        timeout=aiohttp.ClientTimeout(total=60))
    client = BatchClient(session, url=os.environ.get('BATCH_URL'))
    yield client
    client.close()
Esempio n. 4
0
def test_quota_shared_by_io_and_rootfs(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE,
                           ['/bin/sh', '-c', 'fallocate -l 7GiB /foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE,
                           ['/bin/sh', '-c', 'fallocate -l 7GiB /io/foo'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))

    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job(
        DOCKER_ROOT_IMAGE,
        ['/bin/sh', '-c', 'fallocate -l 7GiB /foo; fallocate -l 7GiB /io/foo'],
        resources=resources,
    )
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Failed', str((status, b.debug_info()))
    job_log = j.log()
    assert "fallocate failed: No space left on device" in job_log['main'], str(
        (job_log, b.debug_info()))
Esempio n. 5
0
File: cli.py Progetto: mpilo24a/hail
def main(args):
    if not args:
        parser().print_help()
        sys.exit(0)
    jmp = {
        'list': list_batches,
        'delete': delete,
        'get': get,
        'cancel': cancel,
        'log': log,
        'pod_status': pod_status,
        'wait': wait
    }

    args, pass_through_args = parser().parse_known_args(args=args)

    session = aiohttp.ClientSession(
        raise_for_status=True,
        timeout=aiohttp.ClientTimeout(total=60))
    client = BatchClient(session, url=args.master_url)

    try:
        jmp[args.module].main(args, pass_through_args, client)
    finally:
        client.close()
Esempio n. 6
0
def test_batch_status(client: BatchClient):
    b1 = client.create_batch()
    b1.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b1 = b1.submit()
    b1.wait()
    b1s = b1.status()
    assert b1s['complete'] and b1s['state'] == 'success', str(
        (b1s, b1.debug_info()))

    b2 = client.create_batch()
    b2.create_job(DOCKER_ROOT_IMAGE, ['false'])
    b2.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b2 = b2.submit()
    b2.wait()
    b2s = b2.status()
    assert b2s['complete'] and b2s['state'] == 'failure', str(
        (b2s, b2.debug_info()))

    b3 = client.create_batch()
    b3.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b3 = b3.submit()
    b3s = b3.status()
    assert not b3s['complete'] and b3s['state'] == 'running', str(
        (b3s, b3.debug_info()))
    b3.cancel()

    b4 = client.create_batch()
    b4.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b4 = b4.submit()
    b4.cancel()
    b4.wait()
    b4s = b4.status()
    assert b4s['complete'] and b4s['state'] == 'cancelled', str(
        (b4s, b4.debug_info()))
Esempio n. 7
0
def test_get_nonexistent_job(client: BatchClient):
    try:
        client.get_job(1, 666)
    except aiohttp.ClientResponseError as e:
        if e.status == 404:
            pass
        else:
            raise
Esempio n. 8
0
def test_get_job(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b = b.submit()

    j2 = client.get_job(*j.id)
    status2 = j2.status()
    assert (status2['batch_id'], status2['job_id']) == j.id, str(
        (status, b.debug_info()))
Esempio n. 9
0
def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient):
    remote_tmpdir = get_user_config().get('batch', 'remote_tmpdir')
    script = f'''import hailtop.batch as hb
backend = hb.ServiceBackend("test", remote_tmpdir="{remote_tmpdir}")
b = hb.Batch(backend=backend)
j = b.new_bash_job()
j.command("echo hi")
b.run()
backend.close()
'''

    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_HAIL_BASE_IMAGE'],
        [
            '/bin/bash',
            '-c',
            f'''
hailctl config set domain {DOMAIN}
rm /deploy-config/deploy-config.json
python3 -c \'{script}\'''',
        ],
        mount_tokens=True,
    )
    b = builder.submit()
    status = j.wait()
    if NAMESPACE == 'default':
        assert status['state'] == 'Success', str((status, b.debug_info()))
    else:
        assert status['state'] == 'Failed', str((status, b.debug_info()))
        assert "Please log in" in j.log()['main'], (str(j.log()['main']),
                                                    status)

    builder = client.create_batch()
    j = builder.create_job(
        os.environ['HAIL_HAIL_BASE_IMAGE'],
        [
            '/bin/bash',
            '-c',
            f'''
jq '.default_namespace = "default"' /deploy-config/deploy-config.json > tmp.json
mv tmp.json /deploy-config/deploy-config.json
python3 -c \'{script}\'''',
        ],
        mount_tokens=True,
    )
    b = builder.submit()
    status = j.wait()
    if NAMESPACE == 'default':
        assert status['state'] == 'Success', str((status, b.debug_info()))
    else:
        assert status['state'] == 'Failed', str((status, b.debug_info()))
        job_log = j.log()
        assert "Please log in" in job_log['main'], str(
            (job_log, b.debug_info()))
Esempio n. 10
0
 def __init__(self, billing_project=None):
     if billing_project is None:
         billing_project = get_user_config().get('batch',
                                                 'billing_project',
                                                 fallback=None)
     if billing_project is None:
         raise ValueError(
             f'the billing_project parameter of ServiceBackend must be set '
             f'or run `hailctl config set batch/billing_project '
             f'YOUR_BILLING_PROJECT`')
     self._batch_client = BatchClient(billing_project)
Esempio n. 11
0
 def test_bad_token(self):
     token = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode('ascii')
     bc = BatchClient(_token=token, _service='batch2')
     try:
         b = bc.create_batch()
         j = b.create_job('ubuntu:18.04', ['false'])
         b.submit()
         assert False, j
     except aiohttp.ClientResponseError as e:
         assert e.status == 401, e
     finally:
         bc.close()
Esempio n. 12
0
def test_delete_batch(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b = b.submit()
    b.delete()

    # verify doesn't exist
    try:
        client.get_job(*j.id)
    except aiohttp.ClientResponseError as e:
        if e.status == 404:
            pass
        else:
            raise
Esempio n. 13
0
def test_create_idempotence(client: BatchClient):
    token = secrets.token_urlsafe(32)
    builder1 = client.create_batch(token=token)
    builder2 = client.create_batch(token=token)
    b1 = builder1._create()
    b2 = builder2._create()
    assert b1.id == b2.id
Esempio n. 14
0
def test_batch(client: BatchClient):
    b = client.create_batch()
    j1 = b.create_job(DOCKER_ROOT_IMAGE, ['false'])
    j2 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1'])
    j3 = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'])
    b = b.submit()

    j1.wait()
    j2.wait()
    b.cancel()
    b.wait()
    bstatus = legacy_batch_status(b)

    assert len(bstatus['jobs']) == 3, str((bstatus, b.debug_info()))
    state_count = collections.Counter([j['state'] for j in bstatus['jobs']])
    n_cancelled = state_count['Cancelled']
    n_complete = state_count['Error'] + state_count['Failed'] + state_count[
        'Success']
    assert n_cancelled <= 1, str((bstatus, b.debug_info()))
    assert n_cancelled + n_complete == 3, str((bstatus, b.debug_info()))

    n_failed = sum([
        j['exit_code'] > 0 for j in bstatus['jobs']
        if j['state'] in ('Failed', 'Error')
    ])
    assert n_failed == 1, str((bstatus, b.debug_info()))
Esempio n. 15
0
def test_verify_access_to_public_internet(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(os.environ['HAIL_CURL_IMAGE'],
                           ['curl', '-fsSL', 'example.com'])
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Esempio n. 16
0
def test_list_jobs(client: BatchClient):
    b = client.create_batch()
    j_success = b.create_job(DOCKER_ROOT_IMAGE, ['true'])
    j_failure = b.create_job(DOCKER_ROOT_IMAGE, ['false'])
    j_error = b.create_job(DOCKER_ROOT_IMAGE, ['sleep 5'],
                           attributes={'tag': 'bar'})
    j_running = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1800'],
                             attributes={'tag': 'foo'})

    b = b.submit()
    j_success.wait()
    j_failure.wait()
    j_error.wait()

    def assert_job_ids(expected, q=None):
        jobs = b.jobs(q=q)
        actual = set([j['job_id'] for j in jobs])
        assert actual == expected, str((jobs, b.debug_info()))

    assert_job_ids({j_success.job_id}, 'success')
    assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id},
                   'done')
    assert_job_ids({j_running.job_id}, '!done')
    assert_job_ids({j_running.job_id}, 'tag=foo')
    assert_job_ids({j_error.job_id, j_running.job_id}, 'has:tag')
    assert_job_ids(
        {j_success.job_id, j_failure.job_id, j_error.job_id, j_running.job_id},
        None)

    b.cancel()
Esempio n. 17
0
def test_include_jobs(client: BatchClient):
    b1 = client.create_batch()
    for i in range(2):
        b1.create_job(DOCKER_ROOT_IMAGE, ['true'])
    b1 = b1.submit()
    s = b1.status()
    assert 'jobs' not in s, str((s, b1.debug_info()))
Esempio n. 18
0
def test_unknown_image(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(f'{DOCKER_PREFIX}/does-not-exist', ['echo', 'test'])
    b = b.submit()
    status = j.wait()
    assert j._get_exit_code(status, 'main') is None
    assert status['status']['container_statuses']['main'][
        'short_error'] == 'image not found', str((status, b.debug_info()))
Esempio n. 19
0
def test_nonzero_storage(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '20Gi'}
    j = builder.create_job('ubuntu:18.04', ['/bin/sh', '-c', 'true'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Esempio n. 20
0
def test_list_batches(client: BatchClient):
    tag = secrets.token_urlsafe(64)
    b1 = client.create_batch(attributes={'tag': tag, 'name': 'b1'})
    b1.create_job(DOCKER_ROOT_IMAGE, ['sleep', '3600'])
    b1 = b1.submit()

    b2 = client.create_batch(attributes={'tag': tag, 'name': 'b2'})
    b2.create_job(DOCKER_ROOT_IMAGE, ['echo', 'test'])
    b2 = b2.submit()

    batch_id_test_universe = {b1.id, b2.id}

    def assert_batch_ids(expected: Set[int], q=None):
        assert expected.issubset(batch_id_test_universe)
        max_id = max(batch_id_test_universe)
        min_id = min(batch_id_test_universe)
        span = max_id - min_id + 1
        # list_batches returns all batches for all prev run tests so we set a limit
        batches = client.list_batches(q, last_batch_id=max_id + 1, limit=span)
        full_actual = {b.id for b in batches}
        actual = full_actual.intersection(batch_id_test_universe)
        assert actual == expected, str(
            (full_actual, max_id, span, b1.debug_info(), b2.debug_info()))

    assert_batch_ids({b1.id, b2.id})

    assert_batch_ids({b1.id, b2.id}, f'tag={tag}')

    b2.wait()

    assert_batch_ids({b1.id}, f'!complete tag={tag}')
    assert_batch_ids({b2.id}, f'complete tag={tag}')

    assert_batch_ids({b1.id}, f'!success tag={tag}')
    assert_batch_ids({b2.id}, f'success tag={tag}')

    b1.cancel()
    b1.wait()

    assert_batch_ids({b1.id}, f'!success tag={tag}')
    assert_batch_ids({b2.id}, f'success tag={tag}')

    assert_batch_ids(set(), f'!complete tag={tag}')
    assert_batch_ids({b1.id, b2.id}, f'complete tag={tag}')

    assert_batch_ids({b2.id}, f'tag={tag} name=b2')
Esempio n. 21
0
def test_long_log_line(client: BatchClient):
    b = client.create_batch()
    j = b.create_job(
        DOCKER_ROOT_IMAGE,
        ['/bin/sh', '-c', 'for _ in {0..70000}; do echo -n a; done'])
    b = b.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Esempio n. 22
0
def test_exit_code_duration(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['bash', '-c', 'exit 7'])
    b = builder.submit()
    status = j.wait()
    assert status['exit_code'] == 7, str((status, b.debug_info()))
    assert isinstance(status['duration'], int), str((status, b.debug_info()))
    assert j._get_exit_code(status, 'main') == 7, str((status, b.debug_info()))
Esempio n. 23
0
def test_pool_standard_instance_cheapest(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '1', 'memory': '2.5Gi'}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'standard' in status['status']['worker'], str(
        (status, b.debug_info()))
Esempio n. 24
0
def test_attached_disk(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '400Gi'}
    j = builder.create_job(
        UBUNTU_IMAGE, ['/bin/sh', '-c', 'df -h; fallocate -l 390GiB /io/foo'],
        resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
Esempio n. 25
0
    def __init__(self, billing_project: str = None, bucket: str = None):
        if billing_project is None:
            billing_project = get_user_config().get('batch', 'billing_project', fallback=None)
        if billing_project is None:
            raise ValueError(
                'the billing_project parameter of ServiceBackend must be set '
                'or run `hailctl config set batch/billing_project '
                'MY_BILLING_PROJECT`')
        self._batch_client = BatchClient(billing_project)

        if bucket is None:
            bucket = get_user_config().get('batch', 'bucket', fallback=None)
        if bucket is None:
            raise ValueError(
                'the bucket parameter of ServiceBackend must be set '
                'or run `hailctl config set batch/bucket '
                'MY_BUCKET`')
        self._bucket_name = bucket
Esempio n. 26
0
def test_job_private_instance_nonpreemptible(client: BatchClient):
    builder = client.create_batch()
    resources = {'machine_type': 'n1-standard-1', 'preemptible': False}
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['true'], resources=resources)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    assert 'job-private' in status['status']['worker'], str(
        (status, b.debug_info()))
Esempio n. 27
0
def test_out_of_memory(client: BatchClient):
    builder = client.create_batch()
    resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'}
    j = builder.create_job('python:3.6-slim-stretch',
                           ['python', '-c', 'x = "a" * 1000**3'],
                           resources=resources)
    b = builder.submit()
    status = j.wait()
    assert j._get_out_of_memory(status, 'main'), str((status, b.debug_info()))
Esempio n. 28
0
def test_cwd_from_image_workdir(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(os.environ['HAIL_WORKDIR_IMAGE'],
                           ['/bin/sh', '-c', 'pwd'])
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Success', str((status, b.debug_info()))
    job_log = j.log()
    assert "/work" in job_log['main'], str((job_log, b.debug_info()))
Esempio n. 29
0
def test_timeout(client: BatchClient):
    builder = client.create_batch()
    j = builder.create_job(DOCKER_ROOT_IMAGE, ['sleep', '30'], timeout=5)
    b = builder.submit()
    status = j.wait()
    assert status['state'] == 'Error', str((status, b.debug_info()))
    error_msg = j._get_error(status, 'main')
    assert error_msg and 'JobTimeoutError' in error_msg, str(
        (error_msg, b.debug_info()))
    assert j.exit_code(status) is None, str((status, b.debug_info()))
Esempio n. 30
0
 def test_bad_jwt_key(self):
     fname = pkg_resources.resource_filename(__name__, 'jwt-test-user.json')
     with open(fname) as f:
         userdata = json.loads(f.read())
     token = hj.JWTClient(hj.JWTClient.generate_key()).encode(userdata)
     session = aiohttp.ClientSession(
         raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60))
     bc = BatchClient(session, url=os.environ.get('BATCH_URL'), token=token)
     try:
         b = bc.create_batch()
         j = b.create_job('alpine', ['false'])
         b.submit()
         assert False, j
     except aiohttp.ClientResponseError as e:
         if e.status == 401:
             pass
         else:
             assert False, e
     finally:
         bc.close()