async def create(app): c = Canceller(app) c.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_ready_jobs_loop', run_if_changed, c.cancel_ready_state_changed, c.cancel_cancelled_ready_jobs_loop_body, )) c.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_creating_jobs_loop', run_if_changed, c.cancel_creating_state_changed, c.cancel_cancelled_creating_jobs_loop_body, )) c.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_running_jobs_loop', run_if_changed, c.cancel_running_state_changed, c.cancel_cancelled_running_jobs_loop_body, )) c.task_manager.ensure_future( periodically_call(60, c.cancel_orphaned_attempts_loop_body)) return c
async def async_init(self): log.info(f'initializing {self}') await super().async_init() async for record in self.db.select_and_fetchall( 'SELECT * FROM instances WHERE removed = 0 AND inst_coll = %s;', (self.name, )): instance = Instance.from_record(self.app, self, record) self.add_instance(instance) self.task_manager.ensure_future( retry_long_running( 'create_instances_loop', run_if_changed, self.create_instances_state_changed, self.create_instances_loop_body, )) self.task_manager.ensure_future( retry_long_running('schedule_jobs_loop', run_if_changed, self.scheduler_state_changed, self.schedule_jobs_loop_body)) self.task_manager.ensure_future( periodically_call(15, self.bump_scheduler))
async def async_init(self): self.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_ready_jobs_loop', run_if_changed, self.cancel_ready_state_changed, self.cancel_cancelled_ready_jobs_loop_body, )) self.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_creating_jobs_loop', run_if_changed, self.cancel_creating_state_changed, self.cancel_cancelled_creating_jobs_loop_body, )) self.task_manager.ensure_future( retry_long_running( 'cancel_cancelled_running_jobs_loop', run_if_changed, self.cancel_running_state_changed, self.cancel_cancelled_running_jobs_loop_body, )) self.task_manager.ensure_future( periodically_call(60, self.cancel_orphaned_attempts_loop_body))
async def async_init(self): asyncio.ensure_future(retry_long_running( 'schedule_loop', run_if_changed, self.scheduler_state_changed, self.schedule_loop_body)) asyncio.ensure_future(retry_long_running( 'cancel_cancelled_ready_jobs_loop', run_if_changed, self.cancel_ready_state_changed, self.cancel_cancelled_ready_jobs_loop_body)) asyncio.ensure_future(retry_long_running( 'cancel_cancelled_running_jobs_loop', run_if_changed, self.cancel_running_state_changed, self.cancel_cancelled_running_jobs_loop_body)) asyncio.ensure_future(retry_long_running( 'bump_loop', self.bump_loop))
async def on_startup(app): pool = concurrent.futures.ThreadPoolExecutor() app['blocking_pool'] = pool db = Database() await db.async_init() app['db'] = db row = await db.select_and_fetchone(''' SELECT worker_type, worker_cores, worker_disk_size_gb, instance_id, internal_token, n_tokens FROM globals; ''') app['worker_type'] = row['worker_type'] app['worker_cores'] = row['worker_cores'] app['worker_disk_size_gb'] = row['worker_disk_size_gb'] app['n_tokens'] = row['n_tokens'] instance_id = row['instance_id'] log.info(f'instance_id {instance_id}') app['instance_id'] = instance_id app['driver_headers'] = { 'Authorization': f'Bearer {row["internal_token"]}' } credentials = google.oauth2.service_account.Credentials.from_service_account_file( '/gsa-key/key.json') app['log_store'] = LogStore(BATCH_BUCKET_NAME, instance_id, pool, credentials=credentials) cancel_batch_state_changed = asyncio.Event() app['cancel_batch_state_changed'] = cancel_batch_state_changed asyncio.ensure_future( retry_long_running('cancel_batch_loop', run_if_changed, cancel_batch_state_changed, cancel_batch_loop_body, app)) delete_batch_state_changed = asyncio.Event() app['delete_batch_state_changed'] = delete_batch_state_changed asyncio.ensure_future( retry_long_running('delete_batch_loop', run_if_changed, delete_batch_state_changed, delete_batch_loop_body, app))
def __init__( self, app, db: Database, # BORROWED inst_coll_manager: InstanceCollectionManager, resource_manager: CloudResourceManager, machine_name_prefix: str, config: JobPrivateInstanceManagerConfig, task_manager: aiotools.BackgroundTaskManager, ): super().__init__( db, inst_coll_manager, resource_manager, config.cloud, config.name, machine_name_prefix, is_pool=False, max_instances=config.max_instances, max_live_instances=config.max_live_instances, task_manager=task_manager, ) self.app = app global_scheduler_state_changed: Notice = self.app['scheduler_state_changed'] self.create_instances_state_changed = global_scheduler_state_changed.subscribe() self.scheduler_state_changed = asyncio.Event() self.async_worker_pool: AsyncWorkerPool = app['async_worker_pool'] self.exceeded_shares_counter = ExceededSharesCounter() self.boot_disk_size_gb = config.boot_disk_size_gb task_manager.ensure_future( retry_long_running( 'create_instances_loop', run_if_changed, self.create_instances_state_changed, self.create_instances_loop_body, ) ) task_manager.ensure_future( retry_long_running( 'schedule_jobs_loop', run_if_changed, self.scheduler_state_changed, self.schedule_jobs_loop_body ) ) task_manager.ensure_future(periodically_call(15, self.bump_scheduler))
async def on_startup(app): app['gs_reader'] = ReadGoogleStorage( service_account_key_file='/benchmark-gsa-key/key.json') app['gh_client_session'] = aiohttp.ClientSession() app['github_client'] = gidgethub.aiohttp.GitHubAPI( app['gh_client_session'], 'hail-is/hail', oauth_token=oauth_token) app['batch_client'] = bc.BatchClient(billing_project='benchmark') app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('github_polling_loop', github_polling_loop, app))
async def on_startup(app): db = Database() await db.async_init() app['db'] = db app['client_session'] = httpx.client_session() aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file( '/billing-monitoring-gsa-key/key.json') bigquery_client = aiogoogle.GoogleBigQueryClient( 'broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client compute_client = aiogoogle.GoogleComputeClient( PROJECT, credentials=aiogoogle_credentials) app['compute_client'] = compute_client query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event region_info = { name: await compute_client.get(f'/regions/{name}') for name in BATCH_GCP_REGIONS } zones = [url_basename(z) for r in region_info.values() for z in r['zones']] app['zones'] = zones app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('polling_loop', polling_loop, app)) app['task_manager'].ensure_future( retry_long_running('query_billing_loop', run_if_changed_idempotent, query_billing_event, query_billing_body, app)) app['task_manager'].ensure_future(periodically_call( 60, monitor_disks, app)) app['task_manager'].ensure_future( periodically_call(60, monitor_instances, app))
async def on_startup(app): db = Database() await db.async_init() app['db'] = db aiogoogle_credentials = aiogoogle.Credentials.from_file( '/billing-monitoring-gsa-key/key.json') bigquery_client = BigQueryClient('broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('polling_loop', polling_loop, app)) app['task_manager'].ensure_future( retry_long_running('query_billing_loop', run_if_changed_idempotent, query_billing_event, query_billing_body, app))
async def on_startup(app): credentials = aiogoogle.GoogleCredentials.from_file( '/benchmark-gsa-key/key.json') app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials) app['client_session'] = httpx.client_session() app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'], 'hail-is/hail', oauth_token=oauth_token) app['batch_client'] = await bc.BatchClient.create( billing_project='benchmark') app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('github_polling_loop', github_polling_loop, app))
def __init__( self, app, pool: Pool, async_worker_pool: AsyncWorkerPool, # BORROWED task_manager: aiotools.BackgroundTaskManager, # BORROWED ): self.app = app self.scheduler_state_changed = pool.scheduler_state_changed self.db: Database = app['db'] self.pool = pool self.async_worker_pool = async_worker_pool self.exceeded_shares_counter = ExceededSharesCounter() task_manager.ensure_future( retry_long_running('schedule_loop', run_if_changed, self.scheduler_state_changed, self.schedule_loop_body))
async def async_init(self): self.task_manager.ensure_future( retry_long_running('schedule_loop', run_if_changed, self.scheduler_state_changed, self.schedule_loop_body))