Example #1
0
    def __init__(
            self,
            app,
            db: Database,  # BORROWED
            inst_coll_manager: InstanceCollectionManager,
            resource_manager: CloudResourceManager,
            machine_name_prefix: str,
            config: PoolConfig,
            async_worker_pool: AsyncWorkerPool,  # BORROWED
            task_manager: aiotools.BackgroundTaskManager,  # BORROWED
    ):
        super().__init__(
            db,
            inst_coll_manager,
            resource_manager,
            config.cloud,
            config.name,
            machine_name_prefix,
            is_pool=True,
            max_instances=config.max_instances,
            max_live_instances=config.max_live_instances,
            task_manager=task_manager,
        )
        self.app = app
        self.inst_coll_manager = inst_coll_manager
        global_scheduler_state_changed: Notice = self.app[
            'scheduler_state_changed']
        self.scheduler_state_changed = global_scheduler_state_changed.subscribe(
        )
        self.scheduler = PoolScheduler(self.app, self, async_worker_pool,
                                       task_manager)

        self.healthy_instances_by_free_cores = sortedcontainers.SortedSet(
            key=lambda instance: instance.free_cores_mcpu)

        self.worker_type = config.worker_type
        self.worker_cores = config.worker_cores
        self.worker_local_ssd_data_disk = config.worker_local_ssd_data_disk
        self.worker_external_ssd_data_disk_size_gb = config.worker_external_ssd_data_disk_size_gb
        self.enable_standing_worker = config.enable_standing_worker
        self.standing_worker_cores = config.standing_worker_cores
        self.boot_disk_size_gb = config.boot_disk_size_gb
        self.data_disk_size_gb = config.data_disk_size_gb
        self.data_disk_size_standing_gb = config.data_disk_size_standing_gb
        self.preemptible = config.preemptible

        task_manager.ensure_future(self.control_loop())
Example #2
0
 def __init__(
         self,
         app,
         pool: Pool,
         async_worker_pool: AsyncWorkerPool,  # BORROWED
         task_manager: aiotools.BackgroundTaskManager,  # BORROWED
 ):
     self.app = app
     self.scheduler_state_changed = pool.scheduler_state_changed
     self.db: Database = app['db']
     self.pool = pool
     self.async_worker_pool = async_worker_pool
     self.exceeded_shares_counter = ExceededSharesCounter()
     task_manager.ensure_future(
         retry_long_running('schedule_loop', run_if_changed,
                            self.scheduler_state_changed,
                            self.schedule_loop_body))
Example #3
0
    def __init__(
        self,
        app,
        db: Database,  # BORROWED
        inst_coll_manager: InstanceCollectionManager,
        resource_manager: CloudResourceManager,
        machine_name_prefix: str,
        config: JobPrivateInstanceManagerConfig,
        task_manager: aiotools.BackgroundTaskManager,
    ):
        super().__init__(
            db,
            inst_coll_manager,
            resource_manager,
            config.cloud,
            config.name,
            machine_name_prefix,
            is_pool=False,
            max_instances=config.max_instances,
            max_live_instances=config.max_live_instances,
            task_manager=task_manager,
        )
        self.app = app
        global_scheduler_state_changed: Notice = self.app['scheduler_state_changed']
        self.create_instances_state_changed = global_scheduler_state_changed.subscribe()
        self.scheduler_state_changed = asyncio.Event()

        self.async_worker_pool: AsyncWorkerPool = app['async_worker_pool']
        self.exceeded_shares_counter = ExceededSharesCounter()

        self.boot_disk_size_gb = config.boot_disk_size_gb

        task_manager.ensure_future(
            retry_long_running(
                'create_instances_loop',
                run_if_changed,
                self.create_instances_state_changed,
                self.create_instances_loop_body,
            )
        )
        task_manager.ensure_future(
            retry_long_running(
                'schedule_jobs_loop', run_if_changed, self.scheduler_state_changed, self.schedule_jobs_loop_body
            )
        )
        task_manager.ensure_future(periodically_call(15, self.bump_scheduler))
Example #4
0
        help='The local path will be kept in sync with the remote path.',
    )
    parser.add_argument(
        '--ignore',
        required=False,
        type=str,
        default='flycheck_.*|.*~|\.#.*',
        help=
        'A regular expression indicating in which files to ignore changes.',
    )

    args = parser.parse_args(sys.argv[1:])

    with closing(asyncio.get_event_loop()) as loop:
        monitor = Monitor()
        task_manager = BackgroundTaskManager()
        try:
            sync = Sync(args.path)

            for local, _ in args.path:
                monitor.add_path(local)

            ignore_re = re.compile(args.ignore)

            def callback(path: bytes, evt_time, flags, flags_num, event_num):
                if not ignore_re.fullmatch(os.path.basename(path.decode())):
                    task_manager.ensure_future_threadsafe(sync.should_sync())

            monitor.set_callback(callback)

            signal.signal(signal.SIGINT, monitor._handle_signal)
Example #5
0
    async def create(
            app,
            db: Database,  # BORROWED
            machine_name_prefix: str,
            namespace: str,
            inst_coll_configs: InstanceCollectionConfigs,
            credentials_file: str,
            task_manager: aiotools.BackgroundTaskManager,  # BORROWED
    ) -> 'GCPDriver':
        gcp_config = get_gcp_config()
        project = gcp_config.project
        zone = gcp_config.zone
        regions = gcp_config.regions

        compute_client = aiogoogle.GoogleComputeClient(
            project, credentials_file=credentials_file)

        activity_logs_client = aiogoogle.GoogleLoggingClient(
            credentials_file=credentials_file,
            # The project-wide logging quota is 60 request/m.  The event
            # loop sleeps 15s per iteration, so the max rate is 4
            # iterations/m.  Note, the event loop could make multiple
            # logging requests per iteration, so these numbers are not
            # quite comparable.  I didn't want to consume the entire quota
            # since there will be other users of the logging API (us at
            # the web console, test deployments, etc.)
            rate_limit=RateLimit(10, 60),
        )

        zone_monitor = await ZoneMonitor.create(compute_client, regions, zone)
        billing_manager = await GCPBillingManager.create(db)
        inst_coll_manager = InstanceCollectionManager(db, machine_name_prefix,
                                                      zone_monitor)
        resource_manager = GCPResourceManager(project, compute_client,
                                              billing_manager)

        create_pools_coros = [
            Pool.create(
                app,
                db,
                inst_coll_manager,
                resource_manager,
                machine_name_prefix,
                config,
                app['async_worker_pool'],
                task_manager,
            ) for pool_name, config in
            inst_coll_configs.name_pool_config.items()
        ]

        jpim, *_ = await asyncio.gather(
            JobPrivateInstanceManager.create(
                app,
                db,
                inst_coll_manager,
                resource_manager,
                machine_name_prefix,
                inst_coll_configs.jpim_config,
                task_manager,
            ), *create_pools_coros)

        driver = GCPDriver(
            db,
            machine_name_prefix,
            compute_client,
            activity_logs_client,
            project,
            namespace,
            zone_monitor,
            inst_coll_manager,
            jpim,
            billing_manager,
        )

        task_manager.ensure_future(
            periodically_call(15, driver.process_activity_logs))
        task_manager.ensure_future(
            periodically_call(60, zone_monitor.update_region_quotas))
        task_manager.ensure_future(
            periodically_call(60, driver.delete_orphaned_disks))
        task_manager.ensure_future(
            periodically_call(300, billing_manager.refresh_resources))

        return driver
Example #6
0
    async def create(
            app,
            db: Database,  # BORROWED
            machine_name_prefix: str,
            namespace: str,
            inst_coll_configs: InstanceCollectionConfigs,
            credentials_file: str,
            task_manager: aiotools.BackgroundTaskManager,  # BORROWED
    ) -> 'AzureDriver':
        azure_config = get_azure_config()
        subscription_id = azure_config.subscription_id
        resource_group = azure_config.resource_group
        region = azure_config.region
        regions = [region]

        with open(os.environ['HAIL_SSH_PUBLIC_KEY'], encoding='utf-8') as f:
            ssh_public_key = f.read()

        arm_client = aioazure.AzureResourceManagerClient(
            subscription_id, resource_group, credentials_file=credentials_file)
        compute_client = aioazure.AzureComputeClient(
            subscription_id, resource_group, credentials_file=credentials_file)
        resources_client = aioazure.AzureResourcesClient(
            subscription_id, credentials_file=credentials_file)
        network_client = aioazure.AzureNetworkClient(
            subscription_id, resource_group, credentials_file=credentials_file)
        pricing_client = aioazure.AzurePricingClient()

        region_monitor = await RegionMonitor.create(region)
        billing_manager = await AzureBillingManager.create(
            db, pricing_client, regions)
        inst_coll_manager = InstanceCollectionManager(db, machine_name_prefix,
                                                      region_monitor)
        resource_manager = AzureResourceManager(subscription_id,
                                                resource_group, ssh_public_key,
                                                arm_client, compute_client,
                                                billing_manager)

        create_pools_coros = [
            Pool.create(
                app,
                db,
                inst_coll_manager,
                resource_manager,
                machine_name_prefix,
                config,
                app['async_worker_pool'],
                task_manager,
            ) for pool_name, config in
            inst_coll_configs.name_pool_config.items()
        ]

        jpim, *_ = await asyncio.gather(
            JobPrivateInstanceManager.create(
                app,
                db,
                inst_coll_manager,
                resource_manager,
                machine_name_prefix,
                inst_coll_configs.jpim_config,
                task_manager,
            ),
            *create_pools_coros,
        )

        driver = AzureDriver(
            db,
            machine_name_prefix,
            arm_client,
            compute_client,
            resources_client,
            network_client,
            pricing_client,
            subscription_id,
            resource_group,
            namespace,
            region_monitor,
            inst_coll_manager,
            jpim,
            billing_manager,
        )

        task_manager.ensure_future(
            periodically_call(60, driver.delete_orphaned_nics))
        task_manager.ensure_future(
            periodically_call(60, driver.delete_orphaned_public_ips))
        task_manager.ensure_future(
            periodically_call(60, driver.delete_completed_deployments))
        task_manager.ensure_future(
            periodically_call(
                300, billing_manager.refresh_resources_from_retail_prices))

        return driver