Example #1
0
 def get_image(self, region, config):
     cluster = EC2Manager(None)
     cluster.connect(region=region,
                     aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                     aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)
     ami = cluster.resolve_image_name(config.ec2_image_name)
     return ami
 def _connect(self, region):
     if self.connected_region != region:
         self.cluster = EC2Manager(None)  # create a new Manager to invalidate cached image names, etc.
         self.cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                              aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)
         self.connected_region = region
     return self.cluster
Example #3
0
    def check_instances_requests(self, region, instances, tags):
        successful_requests = {}
        failed_requests = {}

        cluster = EC2Manager(None)
        cluster.connect(region=region,
                        aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)

        results = cluster.check_spot_requests(instances, tags)

        for req_id, result in zip(instances, results):
            if isinstance(result, boto.ec2.instance.Instance):
                self.logger.info("Spot request fulfilled %s -> %s", req_id,
                                 result.id)

                # spot request has been fulfilled
                successful_requests[req_id] = {}
                successful_requests[req_id][
                    'hostname'] = result.public_dns_name
                successful_requests[req_id]['instance_id'] = result.id
                # state_code is a 16-bit value where the high byte is
                # an opaque internal value and should be ignored.
                successful_requests[req_id][
                    'status_code'] = result.state_code & 255
                # Now that we saved the object into our database, mark the instance as updatable
                # so our update code can pick it up and update it accordingly when it changes states
                result.add_tag(SPOTMGR_TAG + "-Updatable", "1")

            # request object is returned in case request is closed/cancelled/failed
            elif isinstance(result,
                            boto.ec2.spotinstancerequest.SpotInstanceRequest):
                if result.state in {"cancelled", "closed"}:
                    # request was not fulfilled for some reason.. blacklist this type/zone for a while
                    self.logger.info("Spot request %s is %s", req_id,
                                     result.state)
                    failed_requests[req_id] = {}
                    failed_requests[req_id]['action'] = 'blacklist'
                    failed_requests[req_id][
                        'instance_type'] = result.launch_specification.instance_type
                elif result.state in {"open", "active"}:
                    # this should not happen! warn and leave in DB in case it's fulfilled later
                    self.logger.warning("Request %s is %s and %s.", req_id,
                                        result.status.code, result.state)
                else:  # state=failed
                    self.logger.error(
                        "Request %s is %s and %s." %
                        (req_id, result.status.code, result.state))
                    failed_requests[req_id] = {}
                    failed_requests[req_id]['action'] = 'disable_pool'
                    break
            elif result is None:
                self.logger.info("spot request %s is still open", req_id)
            else:
                self.logger.warning("Spot request %s returned %s", req_id,
                                    type(result).__name__)

        return (successful_requests, failed_requests)
Example #4
0
    def cancel_requests(self, requested_instances_by_region):
        for region, instance_ids in requested_instances_by_region.items():
            cluster = EC2Manager(None)
            cluster.connect(
                region=region,
                aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)
            cluster.cancel_spot_requests(instance_ids)

        self.logger.info("Canceling %s requests in region %s",
                         len(instance_ids), region)
Example #5
0
    def start_instances(self,
                        config,
                        region,
                        zone,
                        userdata,
                        image,
                        instance_type,
                        count=1):
        images = self._create_laniakea_images(config)

        self.logger.info(
            "Using instance type %s in region %s with availability zone %s.",
            instance_type, region, zone)
        images["default"]['user_data'] = userdata.encode("utf-8")
        images["default"]['placement'] = zone
        images["default"]['count'] = count
        images["default"]['instance_type'] = instance_type

        cluster = EC2Manager(None)
        cluster.connect(region=region,
                        aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)

        images['default']['image_id'] = image
        images['default'].pop('image_name')
        cluster.images = images

        try:
            instances = []
            self.logger.info("Creating %dx %s instances... (%d cores total)",
                             count, instance_type,
                             count * CORES_PER_INSTANCE[instance_type])
            for ec2_request in cluster.create_spot_requests(
                    config.ec2_max_price * CORES_PER_INSTANCE[instance_type],
                    delete_on_termination=True,
                    timeout=10 * 60):
                instances.append(ec2_request)

            return instances

        except (boto.exception.EC2ResponseError,
                boto.exception.BotoServerError) as msg:
            if "MaxSpotInstanceCountExceeded" in str(msg):
                self.logger.warning(
                    "start_instances: Maximum instance count exceeded for region %s",
                    region)
                raise CloudProviderInstanceCountError(
                    "Auto-selected region exceeded its maximum spot instance count."
                )
            elif "Service Unavailable" in str(msg):
                raise CloudProviderTemporaryFailure(
                    "start_instances in region %s: %s" % (region, msg))
            raise
Example #6
0
    def check_instances_state(self, pool_id, region):

        instance_states = {}
        cluster = EC2Manager(None)
        cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)

        boto_instances = cluster.find(filters={"tag:" + SPOTMGR_TAG + "-PoolId": str(pool_id)})

        for instance in boto_instances:
            if instance.state_code not in [INSTANCE_STATE['shutting-down'], INSTANCE_STATE['terminated']]:
                instance_states[instance.id] = {}
                instance_states[instance.id]['status'] = instance.state_code & 255
                instance_states[instance.id]['tags'] = instance.tags

        return instance_states
Example #7
0
    def terminate_instances(self, instances_ids_by_region):
        for region, instance_ids in instances_ids_by_region.items():
            cluster = EC2Manager(None)
            cluster.connect(region=region, aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                            aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY)

            self.logger.info("Terminating %s instances in region %s", len(instance_ids), region)
            boto_instances = cluster.find(instance_ids=instance_ids)
            # Data consistency checks
            for boto_instance in boto_instances:
                # state_code is a 16-bit value where the high byte is
                # an opaque internal value and should be ignored.
                state_code = boto_instance.state_code & 255
                if not ((boto_instance.id in instance_ids) or
                        (state_code == INSTANCE_STATE['shutting-down'] or
                         state_code == INSTANCE_STATE['terminated'])):
                    self.logger.error("Instance with EC2 ID %s (status %d) is not in region list for region %s",
                                      boto_instance.id, state_code, region)

                cluster.terminate(boto_instances)