Example #1
0
    async def create(app):
        c = Canceller(app)

        c.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_ready_jobs_loop',
                run_if_changed,
                c.cancel_ready_state_changed,
                c.cancel_cancelled_ready_jobs_loop_body,
            ))

        c.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_creating_jobs_loop',
                run_if_changed,
                c.cancel_creating_state_changed,
                c.cancel_cancelled_creating_jobs_loop_body,
            ))

        c.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_running_jobs_loop',
                run_if_changed,
                c.cancel_running_state_changed,
                c.cancel_cancelled_running_jobs_loop_body,
            ))

        c.task_manager.ensure_future(
            periodically_call(60, c.cancel_orphaned_attempts_loop_body))

        return c
Example #2
0
    async def async_init(self):
        log.info(f'initializing {self}')

        await super().async_init()

        async for record in self.db.select_and_fetchall(
                'SELECT * FROM instances WHERE removed = 0 AND inst_coll = %s;',
            (self.name, )):
            instance = Instance.from_record(self.app, self, record)
            self.add_instance(instance)

        self.task_manager.ensure_future(
            retry_long_running(
                'create_instances_loop',
                run_if_changed,
                self.create_instances_state_changed,
                self.create_instances_loop_body,
            ))

        self.task_manager.ensure_future(
            retry_long_running('schedule_jobs_loop', run_if_changed,
                               self.scheduler_state_changed,
                               self.schedule_jobs_loop_body))

        self.task_manager.ensure_future(
            periodically_call(15, self.bump_scheduler))
Example #3
0
    async def async_init(self):
        self.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_ready_jobs_loop',
                run_if_changed,
                self.cancel_ready_state_changed,
                self.cancel_cancelled_ready_jobs_loop_body,
            ))
        self.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_creating_jobs_loop',
                run_if_changed,
                self.cancel_creating_state_changed,
                self.cancel_cancelled_creating_jobs_loop_body,
            ))
        self.task_manager.ensure_future(
            retry_long_running(
                'cancel_cancelled_running_jobs_loop',
                run_if_changed,
                self.cancel_running_state_changed,
                self.cancel_cancelled_running_jobs_loop_body,
            ))

        self.task_manager.ensure_future(
            periodically_call(60, self.cancel_orphaned_attempts_loop_body))
Example #4
0
 async def async_init(self):
     asyncio.ensure_future(retry_long_running(
         'schedule_loop',
         run_if_changed, self.scheduler_state_changed, self.schedule_loop_body))
     asyncio.ensure_future(retry_long_running(
         'cancel_cancelled_ready_jobs_loop',
         run_if_changed, self.cancel_ready_state_changed, self.cancel_cancelled_ready_jobs_loop_body))
     asyncio.ensure_future(retry_long_running(
         'cancel_cancelled_running_jobs_loop',
         run_if_changed, self.cancel_running_state_changed, self.cancel_cancelled_running_jobs_loop_body))
     asyncio.ensure_future(retry_long_running(
         'bump_loop',
         self.bump_loop))
Example #5
0
async def on_startup(app):
    pool = concurrent.futures.ThreadPoolExecutor()
    app['blocking_pool'] = pool

    db = Database()
    await db.async_init()
    app['db'] = db

    row = await db.select_and_fetchone('''
SELECT worker_type, worker_cores, worker_disk_size_gb,
  instance_id, internal_token, n_tokens FROM globals;
''')

    app['worker_type'] = row['worker_type']
    app['worker_cores'] = row['worker_cores']
    app['worker_disk_size_gb'] = row['worker_disk_size_gb']
    app['n_tokens'] = row['n_tokens']

    instance_id = row['instance_id']
    log.info(f'instance_id {instance_id}')
    app['instance_id'] = instance_id

    app['driver_headers'] = {
        'Authorization': f'Bearer {row["internal_token"]}'
    }

    credentials = google.oauth2.service_account.Credentials.from_service_account_file(
        '/gsa-key/key.json')
    app['log_store'] = LogStore(BATCH_BUCKET_NAME,
                                instance_id,
                                pool,
                                credentials=credentials)

    cancel_batch_state_changed = asyncio.Event()
    app['cancel_batch_state_changed'] = cancel_batch_state_changed

    asyncio.ensure_future(
        retry_long_running('cancel_batch_loop', run_if_changed,
                           cancel_batch_state_changed, cancel_batch_loop_body,
                           app))

    delete_batch_state_changed = asyncio.Event()
    app['delete_batch_state_changed'] = delete_batch_state_changed

    asyncio.ensure_future(
        retry_long_running('delete_batch_loop', run_if_changed,
                           delete_batch_state_changed, delete_batch_loop_body,
                           app))
Example #6
0
    def __init__(
        self,
        app,
        db: Database,  # BORROWED
        inst_coll_manager: InstanceCollectionManager,
        resource_manager: CloudResourceManager,
        machine_name_prefix: str,
        config: JobPrivateInstanceManagerConfig,
        task_manager: aiotools.BackgroundTaskManager,
    ):
        super().__init__(
            db,
            inst_coll_manager,
            resource_manager,
            config.cloud,
            config.name,
            machine_name_prefix,
            is_pool=False,
            max_instances=config.max_instances,
            max_live_instances=config.max_live_instances,
            task_manager=task_manager,
        )
        self.app = app
        global_scheduler_state_changed: Notice = self.app['scheduler_state_changed']
        self.create_instances_state_changed = global_scheduler_state_changed.subscribe()
        self.scheduler_state_changed = asyncio.Event()

        self.async_worker_pool: AsyncWorkerPool = app['async_worker_pool']
        self.exceeded_shares_counter = ExceededSharesCounter()

        self.boot_disk_size_gb = config.boot_disk_size_gb

        task_manager.ensure_future(
            retry_long_running(
                'create_instances_loop',
                run_if_changed,
                self.create_instances_state_changed,
                self.create_instances_loop_body,
            )
        )
        task_manager.ensure_future(
            retry_long_running(
                'schedule_jobs_loop', run_if_changed, self.scheduler_state_changed, self.schedule_jobs_loop_body
            )
        )
        task_manager.ensure_future(periodically_call(15, self.bump_scheduler))
Example #7
0
async def on_startup(app):
    app['gs_reader'] = ReadGoogleStorage(
        service_account_key_file='/benchmark-gsa-key/key.json')
    app['gh_client_session'] = aiohttp.ClientSession()
    app['github_client'] = gidgethub.aiohttp.GitHubAPI(
        app['gh_client_session'], 'hail-is/hail', oauth_token=oauth_token)
    app['batch_client'] = bc.BatchClient(billing_project='benchmark')
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(
        retry_long_running('github_polling_loop', github_polling_loop, app))
Example #8
0
async def on_startup(app):
    db = Database()
    await db.async_init()
    app['db'] = db
    app['client_session'] = httpx.client_session()

    aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file(
        '/billing-monitoring-gsa-key/key.json')

    bigquery_client = aiogoogle.GoogleBigQueryClient(
        'broad-ctsa', credentials=aiogoogle_credentials)
    app['bigquery_client'] = bigquery_client

    compute_client = aiogoogle.GoogleComputeClient(
        PROJECT, credentials=aiogoogle_credentials)
    app['compute_client'] = compute_client

    query_billing_event = asyncio.Event()
    app['query_billing_event'] = query_billing_event

    region_info = {
        name: await compute_client.get(f'/regions/{name}')
        for name in BATCH_GCP_REGIONS
    }
    zones = [url_basename(z) for r in region_info.values() for z in r['zones']]
    app['zones'] = zones

    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['task_manager'].ensure_future(
        retry_long_running('polling_loop', polling_loop, app))

    app['task_manager'].ensure_future(
        retry_long_running('query_billing_loop', run_if_changed_idempotent,
                           query_billing_event, query_billing_body, app))

    app['task_manager'].ensure_future(periodically_call(
        60, monitor_disks, app))
    app['task_manager'].ensure_future(
        periodically_call(60, monitor_instances, app))
Example #9
0
async def on_startup(app):
    db = Database()
    await db.async_init()
    app['db'] = db

    aiogoogle_credentials = aiogoogle.Credentials.from_file(
        '/billing-monitoring-gsa-key/key.json')
    bigquery_client = BigQueryClient('broad-ctsa',
                                     credentials=aiogoogle_credentials)
    app['bigquery_client'] = bigquery_client

    query_billing_event = asyncio.Event()
    app['query_billing_event'] = query_billing_event

    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['task_manager'].ensure_future(
        retry_long_running('polling_loop', polling_loop, app))

    app['task_manager'].ensure_future(
        retry_long_running('query_billing_loop', run_if_changed_idempotent,
                           query_billing_event, query_billing_body, app))
Example #10
0
async def on_startup(app):
    credentials = aiogoogle.GoogleCredentials.from_file(
        '/benchmark-gsa-key/key.json')
    app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials)
    app['client_session'] = httpx.client_session()
    app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'],
                                                       'hail-is/hail',
                                                       oauth_token=oauth_token)
    app['batch_client'] = await bc.BatchClient.create(
        billing_project='benchmark')
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(
        retry_long_running('github_polling_loop', github_polling_loop, app))
Example #11
0
 def __init__(
         self,
         app,
         pool: Pool,
         async_worker_pool: AsyncWorkerPool,  # BORROWED
         task_manager: aiotools.BackgroundTaskManager,  # BORROWED
 ):
     self.app = app
     self.scheduler_state_changed = pool.scheduler_state_changed
     self.db: Database = app['db']
     self.pool = pool
     self.async_worker_pool = async_worker_pool
     self.exceeded_shares_counter = ExceededSharesCounter()
     task_manager.ensure_future(
         retry_long_running('schedule_loop', run_if_changed,
                            self.scheduler_state_changed,
                            self.schedule_loop_body))
Example #12
0
File: pool.py Project: saponas/hail
 async def async_init(self):
     self.task_manager.ensure_future(
         retry_long_running('schedule_loop', run_if_changed,
                            self.scheduler_state_changed,
                            self.schedule_loop_body))