async def test_update_commits(): deploy_config = get_deploy_config() headers = service_auth_headers(deploy_config, 'benchmark') commit_benchmark_url = deploy_config.url( 'benchmark', f'/api/v1alpha/benchmark/commit/{sha}') async def request(method): return await utils.request_retry_transient_errors( session, method, f'{commit_benchmark_url}', headers=headers, json={'sha': sha}) async with client_session() as session: await request('DELETE') resp = await request('GET') commit = await resp.json() assert commit['status'] is None, commit resp = await request('POST') commit = await resp.json() while commit['status'] is not None and not commit['status']['complete']: await asyncio.sleep(5) resp = await request('GET') commit = await resp.json() print(commit['status'])
async def on_startup(app): db = Database() await db.async_init(maxsize=50) app['db'] = db app['client_session'] = httpx.client_session() app['flow_client'] = get_flow_client( '/auth-oauth2-client-secret/client_secret.json')
async def notify_batch_job_complete(db, batch_id): record = await db.select_and_fetchone( ''' SELECT batches.*, SUM(`usage` * rate) AS cost FROM batches LEFT JOIN aggregated_batch_resources ON batches.id = aggregated_batch_resources.batch_id LEFT JOIN resources ON aggregated_batch_resources.resource = resources.resource WHERE id = %s AND NOT deleted AND callback IS NOT NULL AND batches.`state` = 'complete' GROUP BY batches.id; ''', (batch_id, ), ) if not record: return callback = record['callback'] log.info(f'making callback for batch {batch_id}: {callback}') if record['user'] == 'ci': # only jobs from CI may use batch's TLS identity http_client_session = client_session(timeout=aiohttp.ClientTimeout( total=5)) else: http_client_session = aiohttp.ClientSession( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5)) try: async with http_client_session as session: await session.post(callback, json=batch_record_to_dict(record)) log.info(f'callback for batch {batch_id} successful') except Exception: log.exception(f'callback for batch {batch_id} failed, will not retry.')
async def async_get_userinfo( *, deploy_config: Optional[DeployConfig] = None, session_id: Optional[str] = None, client_session: Optional[httpx.ClientSession] = None): if deploy_config is None: deploy_config = get_deploy_config() if session_id is None: headers = service_auth_headers(deploy_config, 'auth') else: headers = {'Authorization': f'Bearer {session_id}'} userinfo_url = deploy_config.url('auth', '/api/v1alpha/userinfo') async def request(session): try: resp = await request_retry_transient_errors(session, 'GET', userinfo_url, headers=headers) return await resp.json() except aiohttp.client_exceptions.ClientResponseError as err: if err.status == 401: return None raise if client_session is None: async with httpx.client_session() as session: return await request(session) return await request(client_session)
async def async_main(args): deploy_config = get_deploy_config() if args.namespace: deploy_config = deploy_config.with_default_namespace(args.namespace) headers = namespace_auth_headers(deploy_config, deploy_config.default_namespace(), authorize_target=False) async with client_session(headers=headers) as session: await auth_flow(deploy_config, deploy_config.default_namespace(), session)
def __init__(self, billing_project: str, deploy_config: Optional[DeployConfig] = None, session: Optional[aiohttp.ClientSession] = None, headers: Optional[Dict[str, str]] = None, _token: Optional[str] = None, token_file: Optional[str] = None): self.billing_project = billing_project if not deploy_config: deploy_config = get_deploy_config() self.url = deploy_config.base_url('batch') if session is None: session = client_session() self._session = session h: Dict[str, str] = {} if headers: h.update(headers) if _token: h['Authorization'] = f'Bearer {_token}' else: h.update( service_auth_headers(deploy_config, 'batch', token_file=token_file)) self._headers = h
async def __init__(self, billing_project, deploy_config=None, session=None, headers=None, _token=None, token_file=None): self.billing_project = billing_project if not deploy_config: deploy_config = get_deploy_config() self.url = deploy_config.base_url('batch') if session is None: session = client_session(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) self._session = session h = {} if headers: h.update(headers) if _token: h['Authorization'] = f'Bearer {_token}' else: h.update( service_auth_headers(deploy_config, 'batch', token_file=token_file)) self._headers = h
async def test_deploy(): deploy_config = get_deploy_config() ci_deploy_status_url = deploy_config.url('ci', '/api/v1alpha/deploy_status') headers = service_auth_headers(deploy_config, 'ci') async with client_session() as session: async def wait_forever(): deploy_state = None failure_information = None while deploy_state is None: resp = await utils.request_retry_transient_errors( session, 'GET', f'{ci_deploy_status_url}', headers=headers) deploy_statuses = await resp.json() log.info( f'deploy_statuses:\n{json.dumps(deploy_statuses, indent=2)}' ) assert len(deploy_statuses) == 1, deploy_statuses deploy_status = deploy_statuses[0] deploy_state = deploy_status['deploy_state'] failure_information = deploy_status.get('failure_information') await asyncio.sleep(5) log.info(f'returning {deploy_status} {failure_information}') return deploy_state, failure_information deploy_state, failure_information = await wait_forever() assert deploy_state == 'success', str(failure_information)
async def async_main(): app = {} user_creation_loop = None try: db = Database() await db.async_init(maxsize=50) app['db'] = db app['client_session'] = httpx.client_session() db_instance = Database() await db_instance.async_init( maxsize=50, config_file='/database-server-config/sql-config.json') app['db_instance'] = db_instance kubernetes_asyncio.config.load_incluster_config() app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() app['identity_client'] = get_identity_client() app['batch_client'] = await bc.aioclient.BatchClient.create(None) users_changed_event = asyncio.Event() app['users_changed_event'] = users_changed_event async def users_changed_handler(): return await update_users(app) user_creation_loop = EventHandler(users_changed_handler, event=users_changed_event, min_delay_secs=1.0) await user_creation_loop.start() while True: await asyncio.sleep(10000) finally: try: if 'db' in app: await app['db'].async_close() finally: try: if 'db_instance_pool' in app: await app['db_instance_pool'].async_close() finally: try: await app['client_session'].close() finally: try: if user_creation_loop is not None: user_creation_loop.shutdown() finally: try: await app['identity_client'].close() finally: k8s_client: kubernetes_asyncio.client.CoreV1Api = app[ 'k8s_client'] await k8s_client.api_client.rest_client.pool_manager.close( )
async def refresh_inst_colls_on_front_end(app): async with client_session() as session: await request_retry_transient_errors( session, 'PATCH', deploy_config.url('batch', '/api/v1alpha/inst_colls/refresh'), headers=app['batch_headers'], )
async def on_startup(app): if 'BATCH_USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() app['k8s_client'] = client.CoreV1Api() app['dbpool'] = await create_database_pool() app['client_session'] = httpx.client_session()
async def on_startup(app): app['client_session'] = httpx.client_session() app['worker_pool'] = AsyncWorkerPool(parallelism=100, queue_size=10) app['files_in_progress'] = set() app['users'] = {} kube.config.load_incluster_config() k8s_client = kube.client.CoreV1Api() app['k8s_client'] = k8s_client app['redis_pool']: aioredis.ConnectionsPool = await aioredis.create_pool( socket)
async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['client_session'] = httpx.client_session() app['thread_pool'] = thread_pool app['user_keys'] = dict() app['users'] = set() app['queries'] = defaultdict(dict) kube.config.load_incluster_config() k8s_client = kube.client.CoreV1Api() app['k8s_client'] = k8s_client
async def test_invariants(): deploy_config = get_deploy_config() url = deploy_config.url('batch-driver', '/check_invariants') headers = service_auth_headers(deploy_config, 'batch-driver') async with client_session() as session: resp = await utils.request_retry_transient_errors(session, 'GET', url, headers=headers) data = await resp.json() assert data['check_incremental_error'] is None, data assert data['check_resource_aggregation_error'] is None, data
async def async_main(args): deploy_config = get_deploy_config() if args.namespace: deploy_config = deploy_config.with_default_namespace(args.namespace) headers = namespace_auth_headers(deploy_config, deploy_config.default_namespace(), authorize_target=False) async with client_session(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60), headers=headers) as session: await auth_flow(deploy_config, deploy_config.default_namespace(), session)
async def on_startup(app): app['client_session'] = httpx.client_session() app['github_client'] = gh_aiohttp.GitHubAPI(app['client_session'], 'ci', oauth_token=oauth_token) app['batch_client'] = await BatchClient.create('ci') app['db'] = Database() await app['db'].async_init() app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(update_loop(app))
async def on_startup(app): credentials = aiogoogle.GoogleCredentials.from_file( '/benchmark-gsa-key/key.json') app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials) app['client_session'] = httpx.client_session() app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'], 'hail-is/hail', oauth_token=oauth_token) app['batch_client'] = await bc.BatchClient.create( billing_project='benchmark') app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('github_polling_loop', github_polling_loop, app))
async def test_billing_monitoring(): deploy_config = get_deploy_config() monitoring_deploy_config_url = deploy_config.url('monitoring', '/api/v1alpha/billing') headers = service_auth_headers(deploy_config, 'monitoring') async with client_session() as session: async def wait_forever(): data = None while data is None: resp = await utils.request_retry_transient_errors( session, 'GET', f'{monitoring_deploy_config_url}', headers=headers) data = await resp.json() await asyncio.sleep(5) return data data = await asyncio.wait_for(wait_forever(), timeout=30 * 60) assert data['cost_by_service'] is not None, data
async def async_main(): deploy_config = get_deploy_config() auth_ns = deploy_config.service_ns('auth') tokens = get_tokens() if auth_ns not in tokens: print('Not logged in.') return headers = service_auth_headers(deploy_config, 'auth') async with client_session(headers=headers) as session: async with session.post(deploy_config.url('auth', '/api/v1alpha/logout')): pass auth_ns = deploy_config.service_ns('auth') del tokens[auth_ns] tokens.write() print('Logged out.')
async def on_startup(app): db = Database() await db.async_init() app['db'] = db app['client_session'] = httpx.client_session() aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file( '/billing-monitoring-gsa-key/key.json') bigquery_client = aiogoogle.GoogleBigQueryClient( 'broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client compute_client = aiogoogle.GoogleComputeClient( PROJECT, credentials=aiogoogle_credentials) app['compute_client'] = compute_client query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event region_info = { name: await compute_client.get(f'/regions/{name}') for name in BATCH_GCP_REGIONS } zones = [url_basename(z) for r in region_info.values() for z in r['zones']] app['zones'] = zones app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('polling_loop', polling_loop, app)) app['task_manager'].ensure_future( retry_long_running('query_billing_loop', run_if_changed_idempotent, query_billing_event, query_billing_body, app)) app['task_manager'].ensure_future(periodically_call( 60, monitor_disks, app)) app['task_manager'].ensure_future( periodically_call(60, monitor_instances, app))
async def create(billing_project: str, deploy_config: Optional[DeployConfig] = None, session: Optional[httpx.ClientSession] = None, headers: Optional[Dict[str, str]] = None, _token: Optional[str] = None, token_file: Optional[str] = None): if not deploy_config: deploy_config = get_deploy_config() url = deploy_config.base_url('batch') if session is None: session = httpx.client_session() if headers is None: headers = dict() if _token: headers['Authorization'] = f'Bearer {_token}' else: headers.update(service_auth_headers(deploy_config, 'batch', token_file=token_file)) return BatchClient( billing_project=billing_project, url=url, session=session, headers=headers)
async def on_startup(app): task_manager = aiotools.BackgroundTaskManager() app['task_manager'] = task_manager app['client_session'] = httpx.client_session() kubernetes_asyncio.config.load_incluster_config() app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() app['k8s_cache'] = K8sCache(app['k8s_client']) db = Database() await db.async_init(maxsize=50) app['db'] = db row = await db.select_and_fetchone(''' SELECT instance_id, internal_token, frozen FROM globals; ''') instance_id = row['instance_id'] log.info(f'instance_id {instance_id}') app['instance_id'] = instance_id app['internal_token'] = row['internal_token'] app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} app['frozen'] = row['frozen'] scheduler_state_changed = Notice() app['scheduler_state_changed'] = scheduler_state_changed cancel_ready_state_changed = asyncio.Event() app['cancel_ready_state_changed'] = cancel_ready_state_changed cancel_creating_state_changed = asyncio.Event() app['cancel_creating_state_changed'] = cancel_creating_state_changed cancel_running_state_changed = asyncio.Event() app['cancel_running_state_changed'] = cancel_running_state_changed async_worker_pool = AsyncWorkerPool(100, queue_size=100) app['async_worker_pool'] = async_worker_pool credentials_file = '/gsa-key/key.json' fs = get_cloud_async_fs(credentials_file=credentials_file) app['file_store'] = FileStore(fs, BATCH_STORAGE_URI, instance_id) inst_coll_configs = await InstanceCollectionConfigs.create(db) app['driver'] = await get_cloud_driver(app, db, MACHINE_NAME_PREFIX, DEFAULT_NAMESPACE, inst_coll_configs, credentials_file, task_manager) canceller = await Canceller.create(app) app['canceller'] = canceller app['check_incremental_error'] = None app['check_resource_aggregation_error'] = None if HAIL_SHOULD_CHECK_INVARIANTS: task_manager.ensure_future( periodically_call(10, check_incremental, app, db)) task_manager.ensure_future( periodically_call(10, check_resource_aggregation, app, db)) task_manager.ensure_future( periodically_call(10, monitor_billing_limits, app)) task_manager.ensure_future( periodically_call(10, cancel_fast_failing_batches, app)) task_manager.ensure_future( periodically_call(60, scheduling_cancelling_bump, app)) task_manager.ensure_future(periodically_call(15, monitor_system, app))
def __init__(self, should_fail): self.should_fail = should_fail self.real_session = client_session()
async def __aenter__(self): headers = service_auth_headers(self._deploy_config, 'query') self._session = client_session(raise_for_status=False, headers=headers) return self
async def async_init(self): if self._session is None: self._session = client_session() if 'Authorization' not in self._headers: self._headers.update( service_auth_headers(self._deploy_config, 'memory'))
async def on_startup(app): app['task_manager'] = aiotools.BackgroundTaskManager() app['client_session'] = httpx.client_session() kube.config.load_incluster_config() k8s_client = kube.client.CoreV1Api() k8s_cache = K8sCache(k8s_client, refresh_time=5) app['k8s_cache'] = k8s_cache db = Database() await db.async_init(maxsize=50) app['db'] = db row = await db.select_and_fetchone( ''' SELECT instance_id, internal_token, frozen FROM globals; ''' ) instance_id = row['instance_id'] log.info(f'instance_id {instance_id}') app['instance_id'] = instance_id app['internal_token'] = row['internal_token'] app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} app['frozen'] = row['frozen'] resources = db.select_and_fetchall('SELECT resource, rate FROM resources;') app['resource_rates'] = {record['resource']: record['rate'] async for record in resources} aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file('/gsa-key/key.json') compute_client = aiogoogle.GoogleComputeClient(PROJECT, credentials=aiogoogle_credentials) app['compute_client'] = compute_client logging_client = aiogoogle.GoogleLoggingClient( credentials=aiogoogle_credentials, # The project-wide logging quota is 60 request/m. The event # loop sleeps 15s per iteration, so the max rate is 4 # iterations/m. Note, the event loop could make multiple # logging requests per iteration, so these numbers are not # quite comparable. I didn't want to consume the entire quota # since there will be other users of the logging API (us at # the web console, test deployments, etc.) rate_limit=RateLimit(10, 60), ) app['logging_client'] = logging_client scheduler_state_changed = Notice() app['scheduler_state_changed'] = scheduler_state_changed cancel_ready_state_changed = asyncio.Event() app['cancel_ready_state_changed'] = cancel_ready_state_changed cancel_creating_state_changed = asyncio.Event() app['cancel_creating_state_changed'] = cancel_creating_state_changed cancel_running_state_changed = asyncio.Event() app['cancel_running_state_changed'] = cancel_running_state_changed async_worker_pool = AsyncWorkerPool(100, queue_size=100) app['async_worker_pool'] = async_worker_pool credentials = aiogoogle.GoogleCredentials.from_file('/gsa-key/key.json') fs = aiogoogle.GoogleStorageAsyncFS(credentials=credentials) app['file_store'] = FileStore(fs, BATCH_BUCKET_NAME, instance_id) zone_monitor = ZoneMonitor(app) app['zone_monitor'] = zone_monitor await zone_monitor.async_init() inst_coll_configs = InstanceCollectionConfigs(app) await inst_coll_configs.async_init() inst_coll_manager = InstanceCollectionManager(app, MACHINE_NAME_PREFIX) app['inst_coll_manager'] = inst_coll_manager await inst_coll_manager.async_init(inst_coll_configs) canceller = Canceller(app) app['canceller'] = canceller await canceller.async_init() gce_event_monitor = GCEEventMonitor(app, MACHINE_NAME_PREFIX) app['gce_event_monitor'] = gce_event_monitor await gce_event_monitor.async_init() app['check_incremental_error'] = None app['check_resource_aggregation_error'] = None if HAIL_SHOULD_CHECK_INVARIANTS: app['task_manager'].ensure_future(periodically_call(10, check_incremental, app, db)) app['task_manager'].ensure_future(periodically_call(10, check_resource_aggregation, app, db)) app['task_manager'].ensure_future(periodically_call(10, monitor_billing_limits, app)) app['task_manager'].ensure_future(periodically_call(10, cancel_fast_failing_batches, app)) app['task_manager'].ensure_future(periodically_call(60, scheduling_cancelling_bump, app)) app['task_manager'].ensure_future(periodically_call(15, monitor_system, app))
async def on_startup(app): app['client_session'] = httpx.client_session()
async def run(args, i): headers = service_auth_headers(deploy_config, 'workshop', authorize_target=False) async with client_session() as session: # make sure notebook is up async with session.get(deploy_config.url('workshop', ''), headers=headers) as resp: await resp.text() log.info(f'{i} loaded notebook home page') # log in as workshop guest # get csrf token async with session.get(deploy_config.url('workshop', '/login'), headers=headers) as resp: pass data = aiohttp.FormData() data.add_field(name='name', value=args.workshop) data.add_field(name='password', value=args.password) data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) async with session.post(deploy_config.url('workshop', '/login'), data=data, headers=headers) as resp: pass log.info(f'{i} logged in') # create notebook # get csrf token async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp: pass data = aiohttp.FormData() data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) async with session.post(deploy_config.url('workshop', '/notebook'), data=data, headers=headers) as resp: pass log.info(f'{i} created notebook') start = time.time() # wait for notebook ready ready = False attempt = 0 # 5 attempts overkill, should only take 2: Scheduling => Running => Ready while not ready and attempt < 5: async with session.ws_connect( deploy_config.url('workshop', '/notebook/wait', base_scheme='ws'), headers=headers ) as ws: async for msg in ws: if msg.data == '1': ready = True attempt += 1 end = time.time() duration = end - start log.info(f'{i} notebook state {ready} duration {duration}') # delete notebook # get csrf token async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp: pass data = aiohttp.FormData() data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) async with session.post(deploy_config.url('workshop', '/notebook/delete'), data=data, headers=headers) as resp: pass log.info(f'{i} notebook delete, done.') return duration, ready
async def on_startup(app): db = Database() await db.async_init(maxsize=50) app['db'] = db app['client_session'] = httpx.client_session()
async def __aenter__(self): headers = service_auth_headers(self._deploy_config, 'ci') self._session = client_session(raise_for_status=False, timeout=aiohttp.ClientTimeout(total=60), headers=headers) return self