예제 #1
0
 def getProvisionedWorkers(self, nodeType, preemptable):
     entireCluster = self._getNodesInCluster(ctx=self.ctx,
                                             clusterName=self.clusterName,
                                             both=True,
                                             nodeType=nodeType)
     logger.debug('All nodes in cluster: %s', entireCluster)
     workerInstances = [
         i for i in entireCluster if i.private_ip_address != self.leaderIP
     ]
     logger.debug('All workers found in cluster: %s', workerInstances)
     workerInstances = [
         i for i in workerInstances
         if preemptable != (i.spot_instance_request_id is None)
     ]
     logger.debug('%spreemptable workers found in cluster: %s',
                  'non-' if not preemptable else '', workerInstances)
     workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2)
     return [
         Node(publicIP=i.ip_address,
              privateIP=i.private_ip_address,
              name=i.id,
              launchTime=i.launch_time,
              nodeType=i.instance_type,
              preemptable=preemptable) for i in workerInstances
     ]
예제 #2
0
    def destroyCluster(cls, clusterName, zone=None):
        def expectedShutdownErrors(e):
            return e.status == 400 and 'dependent object' in e.body

        ctx = cls._buildContext(clusterName=clusterName, zone=zone)
        instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
        spotIDs = cls._getSpotRequestIDs(ctx, clusterName)
        if spotIDs:
            ctx.ec2.cancel_spot_instance_requests(request_ids=spotIDs)
        instancesToTerminate = awsFilterImpairedNodes(instances, ctx.ec2)
        if instancesToTerminate:
            cls._deleteIAMProfiles(instances=instancesToTerminate, ctx=ctx)
            cls._terminateInstances(instances=instancesToTerminate, ctx=ctx)
        if len(instances) == len(instancesToTerminate):
            logger.info('Deleting security group...')
            for attempt in retry(timeout=300, predicate=expectedShutdownErrors):
                with attempt:
                    try:
                        ctx.ec2.delete_security_group(name=clusterName)
                    except BotoServerError as e:
                        if e.error_code == 'InvalidGroup.NotFound':
                            pass
                        else:
                            raise
            logger.info('... Succesfully deleted security group')
        else:
            assert len(instances) > len(instancesToTerminate)
            # the security group can't be deleted until all nodes are terminated
            logger.warning('The TOIL_AWS_NODE_DEBUG environment variable is set and some nodes '
                           'have failed health checks. As a result, the security group & IAM '
                           'roles will not be deleted.')
예제 #3
0
    def destroyCluster(cls, clusterName, zone=None):
        def expectedShutdownErrors(e):
            return e.status == 400 and 'dependent object' in e.body

        ctx = cls._buildContext(clusterName=clusterName, zone=zone)
        instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
        spotIDs = cls._getSpotRequestIDs(ctx, clusterName)
        if spotIDs:
            ctx.ec2.cancel_spot_instance_requests(request_ids=spotIDs)
        instancesToTerminate = awsFilterImpairedNodes(instances, ctx.ec2)
        if instancesToTerminate:
            cls._deleteIAMProfiles(instances=instancesToTerminate, ctx=ctx)
            cls._terminateInstances(instances=instancesToTerminate, ctx=ctx)
        if len(instances) == len(instancesToTerminate):
            logger.info('Deleting security group...')
            for attempt in retry(timeout=300, predicate=expectedShutdownErrors):
                with attempt:
                    try:
                        ctx.ec2.delete_security_group(name=clusterName)
                    except BotoServerError as e:
                        if e.error_code == 'InvalidGroup.NotFound':
                            pass
                        else:
                            raise
            logger.info('... Succesfully deleted security group')
        else:
            assert len(instances) > len(instancesToTerminate)
            # the security group can't be deleted until all nodes are terminated
            logger.warning('The TOIL_AWS_NODE_DEBUG environment variable is set and some nodes '
                           'have failed health checks. As a result, the security group & IAM '
                           'roles will not be deleted.')
예제 #4
0
 def _getWorkersInCluster(self, preemptable):
     entireCluster = self._getNodesInCluster(both=True)
     logger.debug('All nodes in cluster %s', entireCluster)
     workerInstances = [i for i in entireCluster if i.private_ip_address != self.leaderIP and
                        preemptable != (i.spot_instance_request_id is None)]
     logger.debug('Workers found in cluster %s', workerInstances)
     workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2)
     return workerInstances
예제 #5
0
 def _getWorkersInCluster(self, preemptable):
     entireCluster = self._getNodesInCluster(both=True)
     logger.debug('All nodes in cluster %s', entireCluster)
     workerInstances = [i for i in entireCluster if i.private_ip_address != self.leaderIP and
                        preemptable != (i.spot_instance_request_id is None)]
     logger.debug('Workers found in cluster %s', workerInstances)
     workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2)
     return workerInstances
예제 #6
0
 def _getWorkersInCluster(self, preemptable):
     instances = list(self._getAllRunningInstances())
     workerInstances = [
         i for i in instances if i.id != self._instanceId  # exclude leader
         and preemptable != (i.spot_instance_request_id is None)
     ]
     instancesToTerminate = awsFilterImpairedNodes(workerInstances,
                                                   self._ec2)
     return instancesToTerminate
예제 #7
0
 def getProvisionedWorkers(self, preemptable):
     entireCluster = self._getNodesInCluster(ctx=self.ctx, clusterName=self.clusterName, both=True)
     logger.debug('All nodes in cluster: %s', entireCluster)
     workerInstances = [i for i in entireCluster if i.private_ip_address != self.leaderIP]
     logger.debug('All workers found in cluster: %s', workerInstances)
     workerInstances = [i for i in workerInstances if preemptable != (i.spot_instance_request_id is None)]
     logger.debug('%spreemptable workers found in cluster: %s', 'non-' if not preemptable else '', workerInstances)
     workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2)
     return [Node(publicIP=i.ip_address, privateIP=i.private_ip_address,
                  name=i.id, launchTime=i.launch_time)
             for i in workerInstances]