def getProvisionedWorkers(self, nodeType, preemptable): entireCluster = self._getNodesInCluster(ctx=self.ctx, clusterName=self.clusterName, both=True, nodeType=nodeType) logger.debug('All nodes in cluster: %s', entireCluster) workerInstances = [ i for i in entireCluster if i.private_ip_address != self.leaderIP ] logger.debug('All workers found in cluster: %s', workerInstances) workerInstances = [ i for i in workerInstances if preemptable != (i.spot_instance_request_id is None) ] logger.debug('%spreemptable workers found in cluster: %s', 'non-' if not preemptable else '', workerInstances) workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2) return [ Node(publicIP=i.ip_address, privateIP=i.private_ip_address, name=i.id, launchTime=i.launch_time, nodeType=i.instance_type, preemptable=preemptable) for i in workerInstances ]
def destroyCluster(cls, clusterName, zone=None): def expectedShutdownErrors(e): return e.status == 400 and 'dependent object' in e.body ctx = cls._buildContext(clusterName=clusterName, zone=zone) instances = cls.__getNodesInCluster(ctx, clusterName, both=True) spotIDs = cls._getSpotRequestIDs(ctx, clusterName) if spotIDs: ctx.ec2.cancel_spot_instance_requests(request_ids=spotIDs) instancesToTerminate = awsFilterImpairedNodes(instances, ctx.ec2) if instancesToTerminate: cls._deleteIAMProfiles(instances=instancesToTerminate, ctx=ctx) cls._terminateInstances(instances=instancesToTerminate, ctx=ctx) if len(instances) == len(instancesToTerminate): logger.info('Deleting security group...') for attempt in retry(timeout=300, predicate=expectedShutdownErrors): with attempt: try: ctx.ec2.delete_security_group(name=clusterName) except BotoServerError as e: if e.error_code == 'InvalidGroup.NotFound': pass else: raise logger.info('... Succesfully deleted security group') else: assert len(instances) > len(instancesToTerminate) # the security group can't be deleted until all nodes are terminated logger.warning('The TOIL_AWS_NODE_DEBUG environment variable is set and some nodes ' 'have failed health checks. As a result, the security group & IAM ' 'roles will not be deleted.')
def _getWorkersInCluster(self, preemptable): entireCluster = self._getNodesInCluster(both=True) logger.debug('All nodes in cluster %s', entireCluster) workerInstances = [i for i in entireCluster if i.private_ip_address != self.leaderIP and preemptable != (i.spot_instance_request_id is None)] logger.debug('Workers found in cluster %s', workerInstances) workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2) return workerInstances
def _getWorkersInCluster(self, preemptable): instances = list(self._getAllRunningInstances()) workerInstances = [ i for i in instances if i.id != self._instanceId # exclude leader and preemptable != (i.spot_instance_request_id is None) ] instancesToTerminate = awsFilterImpairedNodes(workerInstances, self._ec2) return instancesToTerminate
def getProvisionedWorkers(self, preemptable): entireCluster = self._getNodesInCluster(ctx=self.ctx, clusterName=self.clusterName, both=True) logger.debug('All nodes in cluster: %s', entireCluster) workerInstances = [i for i in entireCluster if i.private_ip_address != self.leaderIP] logger.debug('All workers found in cluster: %s', workerInstances) workerInstances = [i for i in workerInstances if preemptable != (i.spot_instance_request_id is None)] logger.debug('%spreemptable workers found in cluster: %s', 'non-' if not preemptable else '', workerInstances) workerInstances = awsFilterImpairedNodes(workerInstances, self.ctx.ec2) return [Node(publicIP=i.ip_address, privateIP=i.private_ip_address, name=i.id, launchTime=i.launch_time) for i in workerInstances]