def __init__( self, app, db: Database, # BORROWED inst_coll_manager: InstanceCollectionManager, resource_manager: CloudResourceManager, machine_name_prefix: str, config: PoolConfig, async_worker_pool: AsyncWorkerPool, # BORROWED task_manager: aiotools.BackgroundTaskManager, # BORROWED ): super().__init__( db, inst_coll_manager, resource_manager, config.cloud, config.name, machine_name_prefix, is_pool=True, max_instances=config.max_instances, max_live_instances=config.max_live_instances, task_manager=task_manager, ) self.app = app self.inst_coll_manager = inst_coll_manager global_scheduler_state_changed: Notice = self.app[ 'scheduler_state_changed'] self.scheduler_state_changed = global_scheduler_state_changed.subscribe( ) self.scheduler = PoolScheduler(self.app, self, async_worker_pool, task_manager) self.healthy_instances_by_free_cores = sortedcontainers.SortedSet( key=lambda instance: instance.free_cores_mcpu) self.worker_type = config.worker_type self.worker_cores = config.worker_cores self.worker_local_ssd_data_disk = config.worker_local_ssd_data_disk self.worker_external_ssd_data_disk_size_gb = config.worker_external_ssd_data_disk_size_gb self.enable_standing_worker = config.enable_standing_worker self.standing_worker_cores = config.standing_worker_cores self.boot_disk_size_gb = config.boot_disk_size_gb self.data_disk_size_gb = config.data_disk_size_gb self.data_disk_size_standing_gb = config.data_disk_size_standing_gb self.preemptible = config.preemptible task_manager.ensure_future(self.control_loop())
def __init__( self, app, pool: Pool, async_worker_pool: AsyncWorkerPool, # BORROWED task_manager: aiotools.BackgroundTaskManager, # BORROWED ): self.app = app self.scheduler_state_changed = pool.scheduler_state_changed self.db: Database = app['db'] self.pool = pool self.async_worker_pool = async_worker_pool self.exceeded_shares_counter = ExceededSharesCounter() task_manager.ensure_future( retry_long_running('schedule_loop', run_if_changed, self.scheduler_state_changed, self.schedule_loop_body))
def __init__( self, app, db: Database, # BORROWED inst_coll_manager: InstanceCollectionManager, resource_manager: CloudResourceManager, machine_name_prefix: str, config: JobPrivateInstanceManagerConfig, task_manager: aiotools.BackgroundTaskManager, ): super().__init__( db, inst_coll_manager, resource_manager, config.cloud, config.name, machine_name_prefix, is_pool=False, max_instances=config.max_instances, max_live_instances=config.max_live_instances, task_manager=task_manager, ) self.app = app global_scheduler_state_changed: Notice = self.app['scheduler_state_changed'] self.create_instances_state_changed = global_scheduler_state_changed.subscribe() self.scheduler_state_changed = asyncio.Event() self.async_worker_pool: AsyncWorkerPool = app['async_worker_pool'] self.exceeded_shares_counter = ExceededSharesCounter() self.boot_disk_size_gb = config.boot_disk_size_gb task_manager.ensure_future( retry_long_running( 'create_instances_loop', run_if_changed, self.create_instances_state_changed, self.create_instances_loop_body, ) ) task_manager.ensure_future( retry_long_running( 'schedule_jobs_loop', run_if_changed, self.scheduler_state_changed, self.schedule_jobs_loop_body ) ) task_manager.ensure_future(periodically_call(15, self.bump_scheduler))
help='The local path will be kept in sync with the remote path.', ) parser.add_argument( '--ignore', required=False, type=str, default='flycheck_.*|.*~|\.#.*', help= 'A regular expression indicating in which files to ignore changes.', ) args = parser.parse_args(sys.argv[1:]) with closing(asyncio.get_event_loop()) as loop: monitor = Monitor() task_manager = BackgroundTaskManager() try: sync = Sync(args.path) for local, _ in args.path: monitor.add_path(local) ignore_re = re.compile(args.ignore) def callback(path: bytes, evt_time, flags, flags_num, event_num): if not ignore_re.fullmatch(os.path.basename(path.decode())): task_manager.ensure_future_threadsafe(sync.should_sync()) monitor.set_callback(callback) signal.signal(signal.SIGINT, monitor._handle_signal)
async def create( app, db: Database, # BORROWED machine_name_prefix: str, namespace: str, inst_coll_configs: InstanceCollectionConfigs, credentials_file: str, task_manager: aiotools.BackgroundTaskManager, # BORROWED ) -> 'GCPDriver': gcp_config = get_gcp_config() project = gcp_config.project zone = gcp_config.zone regions = gcp_config.regions compute_client = aiogoogle.GoogleComputeClient( project, credentials_file=credentials_file) activity_logs_client = aiogoogle.GoogleLoggingClient( credentials_file=credentials_file, # The project-wide logging quota is 60 request/m. The event # loop sleeps 15s per iteration, so the max rate is 4 # iterations/m. Note, the event loop could make multiple # logging requests per iteration, so these numbers are not # quite comparable. I didn't want to consume the entire quota # since there will be other users of the logging API (us at # the web console, test deployments, etc.) rate_limit=RateLimit(10, 60), ) zone_monitor = await ZoneMonitor.create(compute_client, regions, zone) billing_manager = await GCPBillingManager.create(db) inst_coll_manager = InstanceCollectionManager(db, machine_name_prefix, zone_monitor) resource_manager = GCPResourceManager(project, compute_client, billing_manager) create_pools_coros = [ Pool.create( app, db, inst_coll_manager, resource_manager, machine_name_prefix, config, app['async_worker_pool'], task_manager, ) for pool_name, config in inst_coll_configs.name_pool_config.items() ] jpim, *_ = await asyncio.gather( JobPrivateInstanceManager.create( app, db, inst_coll_manager, resource_manager, machine_name_prefix, inst_coll_configs.jpim_config, task_manager, ), *create_pools_coros) driver = GCPDriver( db, machine_name_prefix, compute_client, activity_logs_client, project, namespace, zone_monitor, inst_coll_manager, jpim, billing_manager, ) task_manager.ensure_future( periodically_call(15, driver.process_activity_logs)) task_manager.ensure_future( periodically_call(60, zone_monitor.update_region_quotas)) task_manager.ensure_future( periodically_call(60, driver.delete_orphaned_disks)) task_manager.ensure_future( periodically_call(300, billing_manager.refresh_resources)) return driver
async def create( app, db: Database, # BORROWED machine_name_prefix: str, namespace: str, inst_coll_configs: InstanceCollectionConfigs, credentials_file: str, task_manager: aiotools.BackgroundTaskManager, # BORROWED ) -> 'AzureDriver': azure_config = get_azure_config() subscription_id = azure_config.subscription_id resource_group = azure_config.resource_group region = azure_config.region regions = [region] with open(os.environ['HAIL_SSH_PUBLIC_KEY'], encoding='utf-8') as f: ssh_public_key = f.read() arm_client = aioazure.AzureResourceManagerClient( subscription_id, resource_group, credentials_file=credentials_file) compute_client = aioazure.AzureComputeClient( subscription_id, resource_group, credentials_file=credentials_file) resources_client = aioazure.AzureResourcesClient( subscription_id, credentials_file=credentials_file) network_client = aioazure.AzureNetworkClient( subscription_id, resource_group, credentials_file=credentials_file) pricing_client = aioazure.AzurePricingClient() region_monitor = await RegionMonitor.create(region) billing_manager = await AzureBillingManager.create( db, pricing_client, regions) inst_coll_manager = InstanceCollectionManager(db, machine_name_prefix, region_monitor) resource_manager = AzureResourceManager(subscription_id, resource_group, ssh_public_key, arm_client, compute_client, billing_manager) create_pools_coros = [ Pool.create( app, db, inst_coll_manager, resource_manager, machine_name_prefix, config, app['async_worker_pool'], task_manager, ) for pool_name, config in inst_coll_configs.name_pool_config.items() ] jpim, *_ = await asyncio.gather( JobPrivateInstanceManager.create( app, db, inst_coll_manager, resource_manager, machine_name_prefix, inst_coll_configs.jpim_config, task_manager, ), *create_pools_coros, ) driver = AzureDriver( db, machine_name_prefix, arm_client, compute_client, resources_client, network_client, pricing_client, subscription_id, resource_group, namespace, region_monitor, inst_coll_manager, jpim, billing_manager, ) task_manager.ensure_future( periodically_call(60, driver.delete_orphaned_nics)) task_manager.ensure_future( periodically_call(60, driver.delete_orphaned_public_ips)) task_manager.ensure_future( periodically_call(60, driver.delete_completed_deployments)) task_manager.ensure_future( periodically_call( 300, billing_manager.refresh_resources_from_retail_prices)) return driver