Example #1
0
async def test_update_commits():
    deploy_config = get_deploy_config()
    headers = service_auth_headers(deploy_config, 'benchmark')
    commit_benchmark_url = deploy_config.url(
        'benchmark', f'/api/v1alpha/benchmark/commit/{sha}')

    async def request(method):
        return await utils.request_retry_transient_errors(
            session,
            method,
            f'{commit_benchmark_url}',
            headers=headers,
            json={'sha': sha})

    async with client_session() as session:
        await request('DELETE')

        resp = await request('GET')
        commit = await resp.json()
        assert commit['status'] is None, commit

        resp = await request('POST')
        commit = await resp.json()

        while commit['status'] is not None and not commit['status']['complete']:
            await asyncio.sleep(5)
            resp = await request('GET')
            commit = await resp.json()
            print(commit['status'])
Example #2
0
async def on_startup(app):
    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db
    app['client_session'] = httpx.client_session()
    app['flow_client'] = get_flow_client(
        '/auth-oauth2-client-secret/client_secret.json')
Example #3
0
File: job.py Project: saponas/hail
async def notify_batch_job_complete(db, batch_id):
    record = await db.select_and_fetchone(
        '''
SELECT batches.*, SUM(`usage` * rate) AS cost
FROM batches
LEFT JOIN aggregated_batch_resources
  ON batches.id = aggregated_batch_resources.batch_id
LEFT JOIN resources
  ON aggregated_batch_resources.resource = resources.resource
WHERE id = %s AND NOT deleted AND callback IS NOT NULL AND
   batches.`state` = 'complete'
GROUP BY batches.id;
''',
        (batch_id, ),
    )

    if not record:
        return
    callback = record['callback']

    log.info(f'making callback for batch {batch_id}: {callback}')

    if record['user'] == 'ci':
        # only jobs from CI may use batch's TLS identity
        http_client_session = client_session(timeout=aiohttp.ClientTimeout(
            total=5))
    else:
        http_client_session = aiohttp.ClientSession(
            raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5))
    try:
        async with http_client_session as session:
            await session.post(callback, json=batch_record_to_dict(record))
            log.info(f'callback for batch {batch_id} successful')
    except Exception:
        log.exception(f'callback for batch {batch_id} failed, will not retry.')
Example #4
0
async def async_get_userinfo(
        *,
        deploy_config: Optional[DeployConfig] = None,
        session_id: Optional[str] = None,
        client_session: Optional[httpx.ClientSession] = None):
    if deploy_config is None:
        deploy_config = get_deploy_config()
    if session_id is None:
        headers = service_auth_headers(deploy_config, 'auth')
    else:
        headers = {'Authorization': f'Bearer {session_id}'}

    userinfo_url = deploy_config.url('auth', '/api/v1alpha/userinfo')

    async def request(session):
        try:
            resp = await request_retry_transient_errors(session,
                                                        'GET',
                                                        userinfo_url,
                                                        headers=headers)
            return await resp.json()
        except aiohttp.client_exceptions.ClientResponseError as err:
            if err.status == 401:
                return None
            raise

    if client_session is None:
        async with httpx.client_session() as session:
            return await request(session)
    return await request(client_session)
Example #5
0
async def async_main(args):
    deploy_config = get_deploy_config()
    if args.namespace:
        deploy_config = deploy_config.with_default_namespace(args.namespace)
    headers = namespace_auth_headers(deploy_config, deploy_config.default_namespace(), authorize_target=False)
    async with client_session(headers=headers) as session:
        await auth_flow(deploy_config, deploy_config.default_namespace(), session)
Example #6
0
    def __init__(self,
                 billing_project: str,
                 deploy_config: Optional[DeployConfig] = None,
                 session: Optional[aiohttp.ClientSession] = None,
                 headers: Optional[Dict[str, str]] = None,
                 _token: Optional[str] = None,
                 token_file: Optional[str] = None):
        self.billing_project = billing_project

        if not deploy_config:
            deploy_config = get_deploy_config()

        self.url = deploy_config.base_url('batch')

        if session is None:
            session = client_session()
        self._session = session

        h: Dict[str, str] = {}
        if headers:
            h.update(headers)
        if _token:
            h['Authorization'] = f'Bearer {_token}'
        else:
            h.update(
                service_auth_headers(deploy_config,
                                     'batch',
                                     token_file=token_file))
        self._headers = h
    async def __init__(self,
                       billing_project,
                       deploy_config=None,
                       session=None,
                       headers=None,
                       _token=None,
                       token_file=None):
        self.billing_project = billing_project

        if not deploy_config:
            deploy_config = get_deploy_config()

        self.url = deploy_config.base_url('batch')

        if session is None:
            session = client_session(raise_for_status=True,
                                     timeout=aiohttp.ClientTimeout(total=60))
        self._session = session

        h = {}
        if headers:
            h.update(headers)
        if _token:
            h['Authorization'] = f'Bearer {_token}'
        else:
            h.update(
                service_auth_headers(deploy_config,
                                     'batch',
                                     token_file=token_file))
        self._headers = h
Example #8
0
async def test_deploy():
    deploy_config = get_deploy_config()
    ci_deploy_status_url = deploy_config.url('ci',
                                             '/api/v1alpha/deploy_status')
    headers = service_auth_headers(deploy_config, 'ci')
    async with client_session() as session:

        async def wait_forever():
            deploy_state = None
            failure_information = None
            while deploy_state is None:
                resp = await utils.request_retry_transient_errors(
                    session, 'GET', f'{ci_deploy_status_url}', headers=headers)
                deploy_statuses = await resp.json()
                log.info(
                    f'deploy_statuses:\n{json.dumps(deploy_statuses, indent=2)}'
                )
                assert len(deploy_statuses) == 1, deploy_statuses
                deploy_status = deploy_statuses[0]
                deploy_state = deploy_status['deploy_state']
                failure_information = deploy_status.get('failure_information')
                await asyncio.sleep(5)
            log.info(f'returning {deploy_status} {failure_information}')
            return deploy_state, failure_information

        deploy_state, failure_information = await wait_forever()
        assert deploy_state == 'success', str(failure_information)
Example #9
0
async def async_main():
    app = {}

    user_creation_loop = None
    try:
        db = Database()
        await db.async_init(maxsize=50)
        app['db'] = db

        app['client_session'] = httpx.client_session()

        db_instance = Database()
        await db_instance.async_init(
            maxsize=50, config_file='/database-server-config/sql-config.json')
        app['db_instance'] = db_instance

        kubernetes_asyncio.config.load_incluster_config()
        app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api()

        app['identity_client'] = get_identity_client()

        app['batch_client'] = await bc.aioclient.BatchClient.create(None)

        users_changed_event = asyncio.Event()
        app['users_changed_event'] = users_changed_event

        async def users_changed_handler():
            return await update_users(app)

        user_creation_loop = EventHandler(users_changed_handler,
                                          event=users_changed_event,
                                          min_delay_secs=1.0)
        await user_creation_loop.start()

        while True:
            await asyncio.sleep(10000)
    finally:
        try:
            if 'db' in app:
                await app['db'].async_close()
        finally:
            try:
                if 'db_instance_pool' in app:
                    await app['db_instance_pool'].async_close()
            finally:
                try:
                    await app['client_session'].close()
                finally:
                    try:
                        if user_creation_loop is not None:
                            user_creation_loop.shutdown()
                    finally:
                        try:
                            await app['identity_client'].close()
                        finally:
                            k8s_client: kubernetes_asyncio.client.CoreV1Api = app[
                                'k8s_client']
                            await k8s_client.api_client.rest_client.pool_manager.close(
                            )
Example #10
0
File: main.py Project: saponas/hail
async def refresh_inst_colls_on_front_end(app):
    async with client_session() as session:
        await request_retry_transient_errors(
            session,
            'PATCH',
            deploy_config.url('batch', '/api/v1alpha/inst_colls/refresh'),
            headers=app['batch_headers'],
        )
Example #11
0
async def on_startup(app):
    if 'BATCH_USE_KUBE_CONFIG' in os.environ:
        await config.load_kube_config()
    else:
        config.load_incluster_config()
    app['k8s_client'] = client.CoreV1Api()

    app['dbpool'] = await create_database_pool()

    app['client_session'] = httpx.client_session()
Example #12
0
async def on_startup(app):
    app['client_session'] = httpx.client_session()
    app['worker_pool'] = AsyncWorkerPool(parallelism=100, queue_size=10)
    app['files_in_progress'] = set()
    app['users'] = {}
    kube.config.load_incluster_config()
    k8s_client = kube.client.CoreV1Api()
    app['k8s_client'] = k8s_client
    app['redis_pool']: aioredis.ConnectionsPool = await aioredis.create_pool(
        socket)
Example #13
0
async def on_startup(app):
    thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16)
    app['client_session'] = httpx.client_session()
    app['thread_pool'] = thread_pool
    app['user_keys'] = dict()
    app['users'] = set()
    app['queries'] = defaultdict(dict)

    kube.config.load_incluster_config()
    k8s_client = kube.client.CoreV1Api()
    app['k8s_client'] = k8s_client
Example #14
0
async def test_invariants():
    deploy_config = get_deploy_config()
    url = deploy_config.url('batch-driver', '/check_invariants')
    headers = service_auth_headers(deploy_config, 'batch-driver')
    async with client_session() as session:

        resp = await utils.request_retry_transient_errors(session, 'GET', url, headers=headers)
        data = await resp.json()

        assert data['check_incremental_error'] is None, data
        assert data['check_resource_aggregation_error'] is None, data
Example #15
0
async def async_main(args):
    deploy_config = get_deploy_config()
    if args.namespace:
        deploy_config = deploy_config.with_default_namespace(args.namespace)
    headers = namespace_auth_headers(deploy_config,
                                     deploy_config.default_namespace(),
                                     authorize_target=False)
    async with client_session(raise_for_status=True,
                              timeout=aiohttp.ClientTimeout(total=60),
                              headers=headers) as session:
        await auth_flow(deploy_config, deploy_config.default_namespace(),
                        session)
Example #16
0
async def on_startup(app):
    app['client_session'] = httpx.client_session()
    app['github_client'] = gh_aiohttp.GitHubAPI(app['client_session'],
                                                'ci',
                                                oauth_token=oauth_token)
    app['batch_client'] = await BatchClient.create('ci')

    app['db'] = Database()
    await app['db'].async_init()

    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(update_loop(app))
Example #17
0
async def on_startup(app):
    credentials = aiogoogle.GoogleCredentials.from_file(
        '/benchmark-gsa-key/key.json')
    app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials)
    app['client_session'] = httpx.client_session()
    app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'],
                                                       'hail-is/hail',
                                                       oauth_token=oauth_token)
    app['batch_client'] = await bc.BatchClient.create(
        billing_project='benchmark')
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(
        retry_long_running('github_polling_loop', github_polling_loop, app))
Example #18
0
async def test_billing_monitoring():
    deploy_config = get_deploy_config()
    monitoring_deploy_config_url = deploy_config.url('monitoring', '/api/v1alpha/billing')
    headers = service_auth_headers(deploy_config, 'monitoring')
    async with client_session() as session:

        async def wait_forever():
            data = None
            while data is None:
                resp = await utils.request_retry_transient_errors(
                    session, 'GET', f'{monitoring_deploy_config_url}', headers=headers)
                data = await resp.json()
                await asyncio.sleep(5)
            return data

        data = await asyncio.wait_for(wait_forever(), timeout=30 * 60)
        assert data['cost_by_service'] is not None, data
Example #19
0
async def async_main():
    deploy_config = get_deploy_config()

    auth_ns = deploy_config.service_ns('auth')
    tokens = get_tokens()
    if auth_ns not in tokens:
        print('Not logged in.')
        return

    headers = service_auth_headers(deploy_config, 'auth')
    async with client_session(headers=headers) as session:
        async with session.post(deploy_config.url('auth', '/api/v1alpha/logout')):
            pass
    auth_ns = deploy_config.service_ns('auth')

    del tokens[auth_ns]
    tokens.write()

    print('Logged out.')
Example #20
0
async def on_startup(app):
    db = Database()
    await db.async_init()
    app['db'] = db
    app['client_session'] = httpx.client_session()

    aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file(
        '/billing-monitoring-gsa-key/key.json')

    bigquery_client = aiogoogle.GoogleBigQueryClient(
        'broad-ctsa', credentials=aiogoogle_credentials)
    app['bigquery_client'] = bigquery_client

    compute_client = aiogoogle.GoogleComputeClient(
        PROJECT, credentials=aiogoogle_credentials)
    app['compute_client'] = compute_client

    query_billing_event = asyncio.Event()
    app['query_billing_event'] = query_billing_event

    region_info = {
        name: await compute_client.get(f'/regions/{name}')
        for name in BATCH_GCP_REGIONS
    }
    zones = [url_basename(z) for r in region_info.values() for z in r['zones']]
    app['zones'] = zones

    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['task_manager'].ensure_future(
        retry_long_running('polling_loop', polling_loop, app))

    app['task_manager'].ensure_future(
        retry_long_running('query_billing_loop', run_if_changed_idempotent,
                           query_billing_event, query_billing_body, app))

    app['task_manager'].ensure_future(periodically_call(
        60, monitor_disks, app))
    app['task_manager'].ensure_future(
        periodically_call(60, monitor_instances, app))
Example #21
0
 async def create(billing_project: str,
                  deploy_config: Optional[DeployConfig] = None,
                  session: Optional[httpx.ClientSession] = None,
                  headers: Optional[Dict[str, str]] = None,
                  _token: Optional[str] = None,
                  token_file: Optional[str] = None):
     if not deploy_config:
         deploy_config = get_deploy_config()
     url = deploy_config.base_url('batch')
     if session is None:
         session = httpx.client_session()
     if headers is None:
         headers = dict()
     if _token:
         headers['Authorization'] = f'Bearer {_token}'
     else:
         headers.update(service_auth_headers(deploy_config, 'batch', token_file=token_file))
     return BatchClient(
         billing_project=billing_project,
         url=url,
         session=session,
         headers=headers)
Example #22
0
async def on_startup(app):
    task_manager = aiotools.BackgroundTaskManager()
    app['task_manager'] = task_manager

    app['client_session'] = httpx.client_session()

    kubernetes_asyncio.config.load_incluster_config()
    app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api()
    app['k8s_cache'] = K8sCache(app['k8s_client'])

    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db

    row = await db.select_and_fetchone('''
SELECT instance_id, internal_token, frozen FROM globals;
''')

    instance_id = row['instance_id']
    log.info(f'instance_id {instance_id}')
    app['instance_id'] = instance_id

    app['internal_token'] = row['internal_token']

    app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'}

    app['frozen'] = row['frozen']

    scheduler_state_changed = Notice()
    app['scheduler_state_changed'] = scheduler_state_changed

    cancel_ready_state_changed = asyncio.Event()
    app['cancel_ready_state_changed'] = cancel_ready_state_changed

    cancel_creating_state_changed = asyncio.Event()
    app['cancel_creating_state_changed'] = cancel_creating_state_changed

    cancel_running_state_changed = asyncio.Event()
    app['cancel_running_state_changed'] = cancel_running_state_changed

    async_worker_pool = AsyncWorkerPool(100, queue_size=100)
    app['async_worker_pool'] = async_worker_pool

    credentials_file = '/gsa-key/key.json'
    fs = get_cloud_async_fs(credentials_file=credentials_file)
    app['file_store'] = FileStore(fs, BATCH_STORAGE_URI, instance_id)

    inst_coll_configs = await InstanceCollectionConfigs.create(db)

    app['driver'] = await get_cloud_driver(app, db, MACHINE_NAME_PREFIX,
                                           DEFAULT_NAMESPACE,
                                           inst_coll_configs, credentials_file,
                                           task_manager)

    canceller = await Canceller.create(app)
    app['canceller'] = canceller

    app['check_incremental_error'] = None
    app['check_resource_aggregation_error'] = None

    if HAIL_SHOULD_CHECK_INVARIANTS:
        task_manager.ensure_future(
            periodically_call(10, check_incremental, app, db))
        task_manager.ensure_future(
            periodically_call(10, check_resource_aggregation, app, db))

    task_manager.ensure_future(
        periodically_call(10, monitor_billing_limits, app))
    task_manager.ensure_future(
        periodically_call(10, cancel_fast_failing_batches, app))
    task_manager.ensure_future(
        periodically_call(60, scheduling_cancelling_bump, app))
    task_manager.ensure_future(periodically_call(15, monitor_system, app))
 def __init__(self, should_fail):
     self.should_fail = should_fail
     self.real_session = client_session()
Example #24
0
 async def __aenter__(self):
     headers = service_auth_headers(self._deploy_config, 'query')
     self._session = client_session(raise_for_status=False, headers=headers)
     return self
Example #25
0
 async def async_init(self):
     if self._session is None:
         self._session = client_session()
     if 'Authorization' not in self._headers:
         self._headers.update(
             service_auth_headers(self._deploy_config, 'memory'))
Example #26
0
async def on_startup(app):
    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['client_session'] = httpx.client_session()

    kube.config.load_incluster_config()
    k8s_client = kube.client.CoreV1Api()
    k8s_cache = K8sCache(k8s_client, refresh_time=5)
    app['k8s_cache'] = k8s_cache

    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db

    row = await db.select_and_fetchone(
        '''
SELECT instance_id, internal_token, frozen FROM globals;
'''
    )

    instance_id = row['instance_id']
    log.info(f'instance_id {instance_id}')
    app['instance_id'] = instance_id

    app['internal_token'] = row['internal_token']

    app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'}

    app['frozen'] = row['frozen']

    resources = db.select_and_fetchall('SELECT resource, rate FROM resources;')
    app['resource_rates'] = {record['resource']: record['rate'] async for record in resources}

    aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file('/gsa-key/key.json')
    compute_client = aiogoogle.GoogleComputeClient(PROJECT, credentials=aiogoogle_credentials)
    app['compute_client'] = compute_client

    logging_client = aiogoogle.GoogleLoggingClient(
        credentials=aiogoogle_credentials,
        # The project-wide logging quota is 60 request/m.  The event
        # loop sleeps 15s per iteration, so the max rate is 4
        # iterations/m.  Note, the event loop could make multiple
        # logging requests per iteration, so these numbers are not
        # quite comparable.  I didn't want to consume the entire quota
        # since there will be other users of the logging API (us at
        # the web console, test deployments, etc.)
        rate_limit=RateLimit(10, 60),
    )
    app['logging_client'] = logging_client

    scheduler_state_changed = Notice()
    app['scheduler_state_changed'] = scheduler_state_changed

    cancel_ready_state_changed = asyncio.Event()
    app['cancel_ready_state_changed'] = cancel_ready_state_changed

    cancel_creating_state_changed = asyncio.Event()
    app['cancel_creating_state_changed'] = cancel_creating_state_changed

    cancel_running_state_changed = asyncio.Event()
    app['cancel_running_state_changed'] = cancel_running_state_changed

    async_worker_pool = AsyncWorkerPool(100, queue_size=100)
    app['async_worker_pool'] = async_worker_pool

    credentials = aiogoogle.GoogleCredentials.from_file('/gsa-key/key.json')
    fs = aiogoogle.GoogleStorageAsyncFS(credentials=credentials)
    app['file_store'] = FileStore(fs, BATCH_BUCKET_NAME, instance_id)

    zone_monitor = ZoneMonitor(app)
    app['zone_monitor'] = zone_monitor
    await zone_monitor.async_init()

    inst_coll_configs = InstanceCollectionConfigs(app)
    await inst_coll_configs.async_init()

    inst_coll_manager = InstanceCollectionManager(app, MACHINE_NAME_PREFIX)
    app['inst_coll_manager'] = inst_coll_manager
    await inst_coll_manager.async_init(inst_coll_configs)

    canceller = Canceller(app)
    app['canceller'] = canceller
    await canceller.async_init()

    gce_event_monitor = GCEEventMonitor(app, MACHINE_NAME_PREFIX)
    app['gce_event_monitor'] = gce_event_monitor
    await gce_event_monitor.async_init()

    app['check_incremental_error'] = None
    app['check_resource_aggregation_error'] = None

    if HAIL_SHOULD_CHECK_INVARIANTS:
        app['task_manager'].ensure_future(periodically_call(10, check_incremental, app, db))
        app['task_manager'].ensure_future(periodically_call(10, check_resource_aggregation, app, db))

    app['task_manager'].ensure_future(periodically_call(10, monitor_billing_limits, app))

    app['task_manager'].ensure_future(periodically_call(10, cancel_fast_failing_batches, app))

    app['task_manager'].ensure_future(periodically_call(60, scheduling_cancelling_bump, app))

    app['task_manager'].ensure_future(periodically_call(15, monitor_system, app))
Example #27
0
async def on_startup(app):
    app['client_session'] = httpx.client_session()
Example #28
0
async def run(args, i):
    headers = service_auth_headers(deploy_config, 'workshop', authorize_target=False)

    async with client_session() as session:
        # make sure notebook is up
        async with session.get(deploy_config.url('workshop', ''), headers=headers) as resp:
            await resp.text()

        log.info(f'{i} loaded notebook home page')

        # log in as workshop guest
        # get csrf token
        async with session.get(deploy_config.url('workshop', '/login'), headers=headers) as resp:
            pass

        data = aiohttp.FormData()
        data.add_field(name='name', value=args.workshop)
        data.add_field(name='password', value=args.password)
        data.add_field(name='_csrf', value=get_cookie(session, '_csrf'))
        async with session.post(deploy_config.url('workshop', '/login'), data=data, headers=headers) as resp:
            pass

        log.info(f'{i} logged in')

        # create notebook
        # get csrf token
        async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp:
            pass

        data = aiohttp.FormData()
        data.add_field(name='_csrf', value=get_cookie(session, '_csrf'))
        async with session.post(deploy_config.url('workshop', '/notebook'), data=data, headers=headers) as resp:
            pass

        log.info(f'{i} created notebook')

        start = time.time()

        # wait for notebook ready
        ready = False
        attempt = 0
        # 5 attempts overkill, should only take 2: Scheduling => Running => Ready
        while not ready and attempt < 5:
            async with session.ws_connect(
                deploy_config.url('workshop', '/notebook/wait', base_scheme='ws'), headers=headers
            ) as ws:
                async for msg in ws:
                    if msg.data == '1':
                        ready = True
            attempt += 1

        end = time.time()
        duration = end - start

        log.info(f'{i} notebook state {ready} duration {duration}')

        # delete notebook
        # get csrf token
        async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp:
            pass

        data = aiohttp.FormData()
        data.add_field(name='_csrf', value=get_cookie(session, '_csrf'))
        async with session.post(deploy_config.url('workshop', '/notebook/delete'), data=data, headers=headers) as resp:
            pass

        log.info(f'{i} notebook delete, done.')

    return duration, ready
Example #29
0
async def on_startup(app):
    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db
    app['client_session'] = httpx.client_session()
Example #30
0
 async def __aenter__(self):
     headers = service_auth_headers(self._deploy_config, 'ci')
     self._session = client_session(raise_for_status=False,
                                    timeout=aiohttp.ClientTimeout(total=60),
                                    headers=headers)
     return self