Ejemplo n.º 1
0
    def __init__(self, app, name, machine_name_prefix, is_pool):
        self.app = app
        self.db: Database = app['db']
        self.compute_client: aiogoogle.GoogleComputeClient = self.app[
            'compute_client']
        self.zone_monitor: ZoneMonitor = self.app['zone_monitor']

        self.name = name
        self.machine_name_prefix = f'{machine_name_prefix}{self.name}-'
        self.is_pool = is_pool

        self.name_instance: Dict[str, Instance] = {}
        self.live_free_cores_mcpu_by_zone: Dict[
            str, int] = collections.defaultdict(int)

        self.instances_by_last_updated = sortedcontainers.SortedSet(
            key=lambda instance: instance.last_updated)

        self.n_instances_by_state = {
            'pending': 0,
            'active': 0,
            'inactive': 0,
            'deleted': 0
        }

        # pending and active
        self.live_free_cores_mcpu = 0
        self.live_total_cores_mcpu = 0

        self.boot_disk_size_gb = None
        self.max_instances = None
        self.max_live_instances = None

        self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 2
0
 def __init__(self, k8s_client: kube.client.CoreV1Api):
     self.entries: Dict[str, CacheEntry[List[AddressAndPort]]] = dict()
     self.locks: DefaultDict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
     self.keys = sortedcontainers.SortedSet(
         key=lambda key: self.entries[key].expire_time)
     self.k8s_client: kube.client.CoreV1Api = k8s_client
     self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 3
0
 def __init__(self, handler, event=None, bump_secs=60.0, min_delay_secs=0.1):
     self.handler = handler
     if event is None:
         event = asyncio.Event()
     self.event = event
     self.bump_secs = bump_secs
     self.min_delay_secs = min_delay_secs
     self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 4
0
Archivo: pool.py Proyecto: saponas/hail
 def __init__(self, app, pool):
     self.app = app
     self.scheduler_state_changed = pool.scheduler_state_changed
     self.db: Database = app['db']
     self.pool = pool
     self.async_worker_pool: AsyncWorkerPool = self.app['async_worker_pool']
     self.exceeded_shares_counter = ExceededSharesCounter()
     self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 5
0
    def __init__(self, app):
        self.app = app
        self.compute_client: aiogoogle.ComputeClient = app['compute_client']

        self.zone_success_rate = ZoneSuccessRate()

        self.region_info = None

        self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 6
0
    def __init__(self, app, machine_name_prefix):
        self.app = app
        self.db: Database = app['db']
        self.zone_monitor: ZoneMonitor = app['zone_monitor']
        self.compute_client: aiogoogle.ComputeClient = app['compute_client']
        self.logging_client: aiogoogle.LoggingClient = app['logging_client']
        self.inst_coll_manager: InstanceCollectionManager = app['inst_coll_manager']
        self.machine_name_prefix = machine_name_prefix

        self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 7
0
async def on_startup(app):
    app['gs_reader'] = ReadGoogleStorage(
        service_account_key_file='/benchmark-gsa-key/key.json')
    app['gh_client_session'] = aiohttp.ClientSession()
    app['github_client'] = gidgethub.aiohttp.GitHubAPI(
        app['gh_client_session'], 'hail-is/hail', oauth_token=oauth_token)
    app['batch_client'] = bc.BatchClient(billing_project='benchmark')
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(
        retry_long_running('github_polling_loop', github_polling_loop, app))
Ejemplo n.º 8
0
    def __init__(self, app):
        self.app = app
        self.cancel_ready_state_changed: asyncio.Event = app['cancel_ready_state_changed']
        self.cancel_creating_state_changed: asyncio.Event = app['cancel_creating_state_changed']
        self.cancel_running_state_changed: asyncio.Event = app['cancel_running_state_changed']
        self.db: Database = app['db']
        self.async_worker_pool: AsyncWorkerPool = self.app['async_worker_pool']
        self.compute_client: aiogoogle.GoogleComputeClient = self.app['compute_client']
        self.inst_coll_manager: InstanceCollectionManager = app['inst_coll_manager']

        self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 9
0
async def on_startup(app):
    app['gh_client_session'] = aiohttp.ClientSession(
        timeout=aiohttp.ClientTimeout(total=5))
    app['github_client'] = gh_aiohttp.GitHubAPI(app['gh_client_session'],
                                                'ci',
                                                oauth_token=oauth_token)
    app['batch_client'] = BatchClient('ci')
    app['dbpool'] = await create_database_pool()

    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(update_loop(app))
Ejemplo n.º 10
0
async def on_startup(app):
    app['client_session'] = httpx.client_session()
    app['github_client'] = gh_aiohttp.GitHubAPI(app['client_session'],
                                                'ci',
                                                oauth_token=oauth_token)
    app['batch_client'] = await BatchClient.create('ci')

    app['db'] = Database()
    await app['db'].async_init()

    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(update_loop(app))
Ejemplo n.º 11
0
async def on_startup(app):
    credentials = aiogoogle.GoogleCredentials.from_file(
        '/benchmark-gsa-key/key.json')
    app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials)
    app['client_session'] = httpx.client_session()
    app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'],
                                                       'hail-is/hail',
                                                       oauth_token=oauth_token)
    app['batch_client'] = await bc.BatchClient.create(
        billing_project='benchmark')
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(
        retry_long_running('github_polling_loop', github_polling_loop, app))
Ejemplo n.º 12
0
async def on_startup(app):
    token_file = os.environ.get('GITHUB_TOKEN_PATH',
                                '/secrets/scorecard-github-access-token.txt')
    with open(token_file, 'r') as f:
        token = f.read().strip()
    app['gh_session'] = aiohttp.ClientSession(
        raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5))
    gh_client = gidgethub.aiohttp.GitHubAPI(app['gh_session'],
                                            'scorecard',
                                            oauth_token=token)
    app['gh_client'] = gh_client

    asana_client = AsanaClient()
    app['asana_client'] = asana_client

    await update_data(gh_client, asana_client)
    app['task_manager'] = aiotools.BackgroundTaskManager()
    app['task_manager'].ensure_future(poll(gh_client, asana_client))
Ejemplo n.º 13
0
async def on_startup(app):
    db = Database()
    await db.async_init()
    app['db'] = db
    app['client_session'] = httpx.client_session()

    aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file(
        '/billing-monitoring-gsa-key/key.json')

    bigquery_client = aiogoogle.GoogleBigQueryClient(
        'broad-ctsa', credentials=aiogoogle_credentials)
    app['bigquery_client'] = bigquery_client

    compute_client = aiogoogle.GoogleComputeClient(
        PROJECT, credentials=aiogoogle_credentials)
    app['compute_client'] = compute_client

    query_billing_event = asyncio.Event()
    app['query_billing_event'] = query_billing_event

    region_info = {
        name: await compute_client.get(f'/regions/{name}')
        for name in BATCH_GCP_REGIONS
    }
    zones = [url_basename(z) for r in region_info.values() for z in r['zones']]
    app['zones'] = zones

    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['task_manager'].ensure_future(
        retry_long_running('polling_loop', polling_loop, app))

    app['task_manager'].ensure_future(
        retry_long_running('query_billing_loop', run_if_changed_idempotent,
                           query_billing_event, query_billing_body, app))

    app['task_manager'].ensure_future(periodically_call(
        60, monitor_disks, app))
    app['task_manager'].ensure_future(
        periodically_call(60, monitor_instances, app))
Ejemplo n.º 14
0
async def on_startup(app):
    db = Database()
    await db.async_init()
    app['db'] = db

    aiogoogle_credentials = aiogoogle.Credentials.from_file(
        '/billing-monitoring-gsa-key/key.json')
    bigquery_client = BigQueryClient('broad-ctsa',
                                     credentials=aiogoogle_credentials)
    app['bigquery_client'] = bigquery_client

    query_billing_event = asyncio.Event()
    app['query_billing_event'] = query_billing_event

    app['task_manager'] = aiotools.BackgroundTaskManager()

    app['task_manager'].ensure_future(
        retry_long_running('polling_loop', polling_loop, app))

    app['task_manager'].ensure_future(
        retry_long_running('query_billing_loop', run_if_changed_idempotent,
                           query_billing_event, query_billing_body, app))
Ejemplo n.º 15
0
 def __init__(self, parallelism):
     self._semaphore = asyncio.Semaphore(parallelism)
     self.task_manager = aiotools.BackgroundTaskManager()
Ejemplo n.º 16
0
Archivo: main.py Proyecto: saponas/hail
async def on_startup(app):
    app['task_manager'] = aiotools.BackgroundTaskManager()
    pool = concurrent.futures.ThreadPoolExecutor()
    app['blocking_pool'] = pool

    kube.config.load_incluster_config()
    k8s_client = kube.client.CoreV1Api()
    k8s_cache = K8sCache(k8s_client, refresh_time=5)
    app['k8s_cache'] = k8s_cache

    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db

    row = await db.select_and_fetchone('''
SELECT instance_id, internal_token FROM globals;
''')

    instance_id = row['instance_id']
    log.info(f'instance_id {instance_id}')
    app['instance_id'] = instance_id

    app['internal_token'] = row['internal_token']

    app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'}

    resources = db.select_and_fetchall('SELECT resource FROM resources;')

    app['resources'] = [record['resource'] async for record in resources]

    aiogoogle_credentials = aiogoogle.Credentials.from_file(
        '/gsa-key/key.json')
    compute_client = aiogoogle.ComputeClient(PROJECT,
                                             credentials=aiogoogle_credentials)
    app['compute_client'] = compute_client

    logging_client = aiogoogle.LoggingClient(
        credentials=aiogoogle_credentials,
        # The project-wide logging quota is 60 request/m.  The event
        # loop sleeps 15s per iteration, so the max rate is 4
        # iterations/m.  Note, the event loop could make multiple
        # logging requests per iteration, so these numbers are not
        # quite comparable.  I didn't want to consume the entire quota
        # since there will be other users of the logging API (us at
        # the web console, test deployments, etc.)
        rate_limit=RateLimit(10, 60),
    )
    app['logging_client'] = logging_client

    scheduler_state_changed = Notice()
    app['scheduler_state_changed'] = scheduler_state_changed

    cancel_ready_state_changed = asyncio.Event()
    app['cancel_ready_state_changed'] = cancel_ready_state_changed

    cancel_creating_state_changed = asyncio.Event()
    app['cancel_creating_state_changed'] = cancel_creating_state_changed

    cancel_running_state_changed = asyncio.Event()
    app['cancel_running_state_changed'] = cancel_running_state_changed

    async_worker_pool = AsyncWorkerPool(100, queue_size=100)
    app['async_worker_pool'] = async_worker_pool

    credentials = google.oauth2.service_account.Credentials.from_service_account_file(
        '/gsa-key/key.json')
    log_store = LogStore(BATCH_BUCKET_NAME,
                         instance_id,
                         pool,
                         credentials=credentials)
    app['log_store'] = log_store

    zone_monitor = ZoneMonitor(app)
    app['zone_monitor'] = zone_monitor
    await zone_monitor.async_init()

    inst_coll_configs = InstanceCollectionConfigs(app)
    await inst_coll_configs.async_init()

    inst_coll_manager = InstanceCollectionManager(app, MACHINE_NAME_PREFIX)
    app['inst_coll_manager'] = inst_coll_manager
    await inst_coll_manager.async_init(inst_coll_configs)

    canceller = Canceller(app)
    app['canceller'] = canceller
    await canceller.async_init()

    gce_event_monitor = GCEEventMonitor(app, MACHINE_NAME_PREFIX)
    app['gce_event_monitor'] = gce_event_monitor
    await gce_event_monitor.async_init()

    app['check_incremental_error'] = None
    app['check_resource_aggregation_error'] = None

    if HAIL_SHOULD_CHECK_INVARIANTS:
        app['task_manager'].ensure_future(
            periodically_call(10, check_incremental, app, db))
        app['task_manager'].ensure_future(
            periodically_call(10, check_resource_aggregation, app, db))

    app['task_manager'].ensure_future(
        periodically_call(10, monitor_billing_limits, app))

    app['task_manager'].ensure_future(
        periodically_call(10, cancel_fast_failing_batches, app))

    app['task_manager'].ensure_future(
        periodically_call(60, scheduling_cancelling_bump, app))
Ejemplo n.º 17
0
async def on_startup(app):
    task_manager = aiotools.BackgroundTaskManager()
    app['task_manager'] = task_manager

    app['client_session'] = httpx.client_session()

    kubernetes_asyncio.config.load_incluster_config()
    app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api()
    app['k8s_cache'] = K8sCache(app['k8s_client'])

    db = Database()
    await db.async_init(maxsize=50)
    app['db'] = db

    row = await db.select_and_fetchone('''
SELECT instance_id, internal_token, frozen FROM globals;
''')

    instance_id = row['instance_id']
    log.info(f'instance_id {instance_id}')
    app['instance_id'] = instance_id

    app['internal_token'] = row['internal_token']

    app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'}

    app['frozen'] = row['frozen']

    scheduler_state_changed = Notice()
    app['scheduler_state_changed'] = scheduler_state_changed

    cancel_ready_state_changed = asyncio.Event()
    app['cancel_ready_state_changed'] = cancel_ready_state_changed

    cancel_creating_state_changed = asyncio.Event()
    app['cancel_creating_state_changed'] = cancel_creating_state_changed

    cancel_running_state_changed = asyncio.Event()
    app['cancel_running_state_changed'] = cancel_running_state_changed

    async_worker_pool = AsyncWorkerPool(100, queue_size=100)
    app['async_worker_pool'] = async_worker_pool

    credentials_file = '/gsa-key/key.json'
    fs = get_cloud_async_fs(credentials_file=credentials_file)
    app['file_store'] = FileStore(fs, BATCH_STORAGE_URI, instance_id)

    inst_coll_configs = await InstanceCollectionConfigs.create(db)

    app['driver'] = await get_cloud_driver(app, db, MACHINE_NAME_PREFIX,
                                           DEFAULT_NAMESPACE,
                                           inst_coll_configs, credentials_file,
                                           task_manager)

    canceller = await Canceller.create(app)
    app['canceller'] = canceller

    app['check_incremental_error'] = None
    app['check_resource_aggregation_error'] = None

    if HAIL_SHOULD_CHECK_INVARIANTS:
        task_manager.ensure_future(
            periodically_call(10, check_incremental, app, db))
        task_manager.ensure_future(
            periodically_call(10, check_resource_aggregation, app, db))

    task_manager.ensure_future(
        periodically_call(10, monitor_billing_limits, app))
    task_manager.ensure_future(
        periodically_call(10, cancel_fast_failing_batches, app))
    task_manager.ensure_future(
        periodically_call(60, scheduling_cancelling_bump, app))
    task_manager.ensure_future(periodically_call(15, monitor_system, app))