def __init__( self, # Name of experiment experimentId, # Config yaml file config=None, # Number of remote workers to spin up num_workers=1, # Compute requirements, amount of RAM, GPU, etc resources_needed={}, # Name of the queue for submission to a server. queue=None, # What computer resource to use, either AWS, Google, or local cloud=None, # Timeout for cloud instances cloud_timeout=100, # Bid price for EC2 spot instances bid='100%', # Keypair to use for EC2 workers ssh_keypair=None, # If true, get results that are submitted by other instances of CS resumable=False, # Whether to clean the submission queue on initialization clean_queue=True, # Whether to enable autoscaling for EC2 instances queue_upscaling=True, # Whether to delete the queue on shutdown shutdown_del_queue=False, # delay between queries for results sleep_time=1 ): self.config = model.get_config(config) self.cloud = cloud self.experimentId = experimentId self.project_name = "completion_service_" + experimentId self.resources_needed = DEFAULT_RESOURCES_NEEDED if self.config.get('resources_needed'): self.resources_needed.update(self.config.get('resources_needed')) self.resources_needed.update(resources_needed) self.wm = runner.get_worker_manager( self.config, self.cloud) self.logger = logs.getLogger(self.__class__.__name__) self.verbose_level = model.parse_verbosity(self.config['verbose']) self.logger.setLevel(self.verbose_level) self.queue = runner.get_queue(queue, self.cloud, self.verbose_level) self.queue_name = self.queue.get_name() self.clean_queue = clean_queue if self.clean_queue: self.queue.clean() self.cloud_timeout = cloud_timeout self.bid = bid self.ssh_keypair = ssh_keypair self.submitted = set([]) self.num_workers = num_workers self.resumable = resumable self.queue_upscaling = queue_upscaling self.shutdown_del_queue = shutdown_del_queue self.use_spot = cloud in ['ec2spot', 'gcspot'] self.sleep_time = sleep_time
def __init__( self, # Name of experiment experimentId, # Completion service configuration cs_config=None, # used to pass a studioML configuration block read by client software studio_config=None, # Studio config yaml file studio_config_file=None, shutdown_del_queue=False): # StudioML configuration self.config = model.get_config(studio_config_file) self.logger = logs.getLogger(self.__class__.__name__) self.verbose_level = model.parse_verbosity(self.config['verbose']) self.logger.setLevel(self.verbose_level) # Setup Completion Service instance properties # based on configuration self.experimentId = experimentId self.project_name = "completion_service_" + experimentId self.resumable = RESUMABLE self.clean_queue = CLEAN_QUEUE self.queue_upscaling = QUEUE_UPSCALING self.num_workers = int(cs_config.get('num_workers', 1)) self.cloud_timeout = cs_config.get('timeout') self.bid = cs_config.get('bid') self.ssh_keypair = cs_config.get('ssh_keypair') self.sleep_time = cs_config.get('sleep_time') self.shutdown_del_queue = shutdown_del_queue # Figure out request for resources: resources_needed = cs_config.get('resources_needed') self.resources_needed = DEFAULT_RESOURCES_NEEDED self.resources_needed.update(resources_needed) studio_resources = self.config.get('resources_needed') if studio_resources: self.resources_needed.update(studio_resources) # Figure out task queue and cloud we are going to use: queue_name = cs_config.get('queue') cloud_name = cs_config.get('cloud') if cs_config.get('local'): queue_name = None cloud_name = None elif queue_name is not None: self.shutdown_del_queue = False if cloud_name in ['ec2spot', 'ec2']: assert queue_name.startswith("sqs_") else: queue_name = self.experiment_id if cloud_name in ['ec2spot', 'ec2']: queue_name = "sqs_" + queue_name self.cloud = cloud_name if queue_name is not None and queue_name.startswith("rmq_"): assert self.cloud is None self.wm = runner.get_worker_manager(self.config, self.cloud) if queue_name is not None: self.logger.info( "CompletionService configured with queue {0}".format( queue_name)) self.queue = runner.get_queue(queue_name=queue_name, cloud=self.cloud, config=self.config, logger=self.logger, verbose=self.verbose_level) self.queue_name = self.queue.get_name() self.submitted = {} self.use_spot = cloud_name in ['ec2spot', 'gcspot'] self.logger.info("Project name: {0}".format(self.project_name)) self.logger.info("Initial/final queue name: {0}, {1}".format( queue_name, self.queue_name)) self.logger.info("Cloud name: {0}".format(self.cloud))