def get_image(self, region, config): cluster = EC2Manager(None) cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) ami = cluster.resolve_image_name(config.ec2_image_name) return ami
def _connect(self, region): if self.connected_region != region: self.cluster = EC2Manager(None) # create a new Manager to invalidate cached image names, etc. self.cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) self.connected_region = region return self.cluster
def check_instances_requests(self, region, instances, tags): successful_requests = {} failed_requests = {} cluster = EC2Manager(None) cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) results = cluster.check_spot_requests(instances, tags) for req_id, result in zip(instances, results): if isinstance(result, boto.ec2.instance.Instance): self.logger.info("Spot request fulfilled %s -> %s", req_id, result.id) # spot request has been fulfilled successful_requests[req_id] = {} successful_requests[req_id][ 'hostname'] = result.public_dns_name successful_requests[req_id]['instance_id'] = result.id # state_code is a 16-bit value where the high byte is # an opaque internal value and should be ignored. successful_requests[req_id][ 'status_code'] = result.state_code & 255 # Now that we saved the object into our database, mark the instance as updatable # so our update code can pick it up and update it accordingly when it changes states result.add_tag(SPOTMGR_TAG + "-Updatable", "1") # request object is returned in case request is closed/cancelled/failed elif isinstance(result, boto.ec2.spotinstancerequest.SpotInstanceRequest): if result.state in {"cancelled", "closed"}: # request was not fulfilled for some reason.. blacklist this type/zone for a while self.logger.info("Spot request %s is %s", req_id, result.state) failed_requests[req_id] = {} failed_requests[req_id]['action'] = 'blacklist' failed_requests[req_id][ 'instance_type'] = result.launch_specification.instance_type elif result.state in {"open", "active"}: # this should not happen! warn and leave in DB in case it's fulfilled later self.logger.warning("Request %s is %s and %s.", req_id, result.status.code, result.state) else: # state=failed self.logger.error( "Request %s is %s and %s." % (req_id, result.status.code, result.state)) failed_requests[req_id] = {} failed_requests[req_id]['action'] = 'disable_pool' break elif result is None: self.logger.info("spot request %s is still open", req_id) else: self.logger.warning("Spot request %s returned %s", req_id, type(result).__name__) return (successful_requests, failed_requests)
def cancel_requests(self, requested_instances_by_region): for region, instance_ids in requested_instances_by_region.items(): cluster = EC2Manager(None) cluster.connect( region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) cluster.cancel_spot_requests(instance_ids) self.logger.info("Canceling %s requests in region %s", len(instance_ids), region)
def start_instances(self, config, region, zone, userdata, image, instance_type, count=1): images = self._create_laniakea_images(config) self.logger.info( "Using instance type %s in region %s with availability zone %s.", instance_type, region, zone) images["default"]['user_data'] = userdata.encode("utf-8") images["default"]['placement'] = zone images["default"]['count'] = count images["default"]['instance_type'] = instance_type cluster = EC2Manager(None) cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) images['default']['image_id'] = image images['default'].pop('image_name') cluster.images = images try: instances = [] self.logger.info("Creating %dx %s instances... (%d cores total)", count, instance_type, count * CORES_PER_INSTANCE[instance_type]) for ec2_request in cluster.create_spot_requests( config.ec2_max_price * CORES_PER_INSTANCE[instance_type], delete_on_termination=True, timeout=10 * 60): instances.append(ec2_request) return instances except (boto.exception.EC2ResponseError, boto.exception.BotoServerError) as msg: if "MaxSpotInstanceCountExceeded" in str(msg): self.logger.warning( "start_instances: Maximum instance count exceeded for region %s", region) raise CloudProviderInstanceCountError( "Auto-selected region exceeded its maximum spot instance count." ) elif "Service Unavailable" in str(msg): raise CloudProviderTemporaryFailure( "start_instances in region %s: %s" % (region, msg)) raise
def check_instances_state(self, pool_id, region): instance_states = {} cluster = EC2Manager(None) cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) boto_instances = cluster.find(filters={"tag:" + SPOTMGR_TAG + "-PoolId": str(pool_id)}) for instance in boto_instances: if instance.state_code not in [INSTANCE_STATE['shutting-down'], INSTANCE_STATE['terminated']]: instance_states[instance.id] = {} instance_states[instance.id]['status'] = instance.state_code & 255 instance_states[instance.id]['tags'] = instance.tags return instance_states
def terminate_instances(self, instances_ids_by_region): for region, instance_ids in instances_ids_by_region.items(): cluster = EC2Manager(None) cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) self.logger.info("Terminating %s instances in region %s", len(instance_ids), region) boto_instances = cluster.find(instance_ids=instance_ids) # Data consistency checks for boto_instance in boto_instances: # state_code is a 16-bit value where the high byte is # an opaque internal value and should be ignored. state_code = boto_instance.state_code & 255 if not ((boto_instance.id in instance_ids) or (state_code == INSTANCE_STATE['shutting-down'] or state_code == INSTANCE_STATE['terminated'])): self.logger.error("Instance with EC2 ID %s (status %d) is not in region list for region %s", boto_instance.id, state_code, region) cluster.terminate(boto_instances)