Example #1
0
def create_spot_instances(ec2, price, image_id, spec, clusterName,
                          num_instances=1, timeout=None, tentative=False):
    """
    Adapted from cgcloud.lib.ec2.create_spot_instances to tag spot requests with the cluster name
    so they can be discovered and cleaned up at a later time

    :rtype: Iterator[list[Instance]]
    """
    def spotRequestNotFound(e):
        return e.error_code == "InvalidSpotInstanceRequestID.NotFound"

    for attempt in retry_ec2(retry_for=a_long_time,
                             retry_while=inconsistencies_detected):
        with attempt:
            requests = ec2.request_spot_instances(price, image_id, count=num_instances, **spec)

    for requestID in (request.id for request in requests):
        for attempt in retry_ec2(retry_while=spotRequestNotFound):
            with attempt:
                ec2.create_tags([requestID], {'clusterName': clusterName})

    num_active, num_other = 0, 0
    # noinspection PyUnboundLocalVariable,PyTypeChecker
    # request_spot_instances's type annotation is wrong
    for batch in wait_spot_requests_active(ec2,
                                           requests,
                                           timeout=timeout,
                                           tentative=tentative):
        instance_ids = []
        for request in batch:
            if request.state == 'active':
                instance_ids.append(request.instance_id)
                num_active += 1
            else:
                logger.info('Request %s in unexpected state %s.', request.id, request.state)
                num_other += 1
        if instance_ids:
            # This next line is the reason we batch. It's so we can get multiple instances in
            # a single request.
            yield ec2.get_only_instances(instance_ids)
    if not num_active:
        message = 'None of the spot requests entered the active state'
        if tentative:
            logger.warn(message + '.')
        else:
            raise RuntimeError(message)
    if num_other:
        logger.warn('%i request(s) entered a state other than active.', num_other)
Example #2
0
    def _tag_object_persistently( self, tagged_ec2_object, tags_dict ):
        """
        Object tagging occasionally fails with "NotFound" types of errors so we need to
        retry a few times. Sigh ...

        :type tagged_ec2_object: boto.ec2.TaggedEC2Object
        """
        for attempt in retry_ec2( ):
            with attempt:
                tagged_ec2_object.add_tags( tags_dict )
Example #3
0
 def __delete_image_snapshot( self, image, wait=True ):
     for root_device in self.possible_root_devices:
         root_bdt = image.block_device_mapping.get( root_device )
         if root_bdt:
             snapshot_id = image.block_device_mapping[ root_device ].snapshot_id
             log.info( "Deleting snapshot %s.", snapshot_id )
             # It is safe to retry this indefinitely because a snapshot can only be
             # referenced by one AMI. See also https://github.com/boto/boto/issues/3019.
             for attempt in retry_ec2(
                     retry_for=a_long_time if wait else 0,
                     retry_while=lambda e: e.error_code == 'InvalidSnapshot.InUse' ):
                 with attempt:
                     self.ctx.ec2.delete_snapshot( snapshot_id )
             return
     raise RuntimeError( 'Could not determine root device in AMI' )
Example #4
0
    def destroyCluster(cls, clusterName):
        def expectedShutdownErrors(e):
            return e.status == 400 and 'dependent object' in e.body

        ctx = Context(availability_zone='us-west-2a', namespace=cls._toNameSpace(clusterName))
        instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
        spotIDs = cls._getSpotRequestIDs(ctx, clusterName)
        if spotIDs:
            ctx.ec2.cancel_spot_instance_requests(request_ids=spotIDs)
        if instances:
            cls._deleteIAMProfiles(instances=instances, ctx=ctx)
            cls._terminateInstance(instances=instances, ctx=ctx)
        logger.info('Deleting security group...')
        for attempt in retry_ec2(retry_after=30, retry_for=300, retry_while=expectedShutdownErrors):
            with attempt:
                ctx.ec2.delete_security_group(name=clusterName)
        logger.info('... Succesfully deleted security group')
Example #5
0
    def _create( self ):
        """
        Requests the RunInstances EC2 API call but accounts for the race between recently created
        instance profiles, IAM roles and an instance creation that refers to them.

        :rtype: boto.ec2.instance.Reservation
        """
        instance_type = self.instance_creation_args[ 'instance_type' ]
        log.info( 'Creating %s instance(s) ... ', instance_type )

        def inconsistencies_detected( e ):
            if e.code == 'InvalidGroup.NotFound': return True
            m = e.error_message.lower( )
            return 'invalid iam instance profile' in m or 'no associated iam roles' in m

        for attempt in retry_ec2( retry_for=a_long_time,
                                  retry_while=inconsistencies_detected ):
            with attempt:
                return self.ctx.ec2.run_instances( self.image_id, **self.instance_creation_args )