def _addNodes(self, instances, numNodes, preemptable=False): bdm = self._getBlockDeviceMapping(self.instanceType) arn = self._getProfileARN(self.ctx) workerData = dict(role='worker', image=applianceSelf(), entrypoint='mesos-slave', args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable)) userData = awsUserData.format(**workerData) kwargs = {'key_name': self.keyName, 'security_groups': [self.clusterName], 'instance_type': self.instanceType.name, 'user_data': userData, 'block_device_map': bdm, 'instance_profile_arn': arn} instancesLaunched = [] if not preemptable: logger.info('Launching %s non-preemptable nodes', numNodes) instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx), spec=kwargs, num_instances=1) else: logger.info('Launching %s preemptable nodes', numNodes) # force generator to evaluate instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2, price=self.spotBid, image_id=self._discoverAMI(self.ctx), tags={'clusterName': self.clusterName}, spec=kwargs, num_instances=numNodes)) wait_instances_running(self.ctx.ec2, instancesLaunched) logger.info('Launched %s new instance(s)', numNodes) return len(instancesLaunched)
def addNodes(self, nodeType, numNodes, preemptable): instanceType = ec2_instance_types[nodeType] bdm = self._getBlockDeviceMapping(instanceType, rootVolSize=self.nodeStorage) arn = self._getProfileARN(self.ctx) keyPath = '' if not self.config or not self.config.sseKey else self.config.sseKey entryPoint = 'mesos-slave' if not self.config or not self.config.sseKey else "waitForKey.sh" workerData = dict(role='worker', image=applianceSelf(), entrypoint=entryPoint, sshKey=self.masterPublicKey, args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable, keyPath=keyPath)) userData = awsUserData.format(**workerData) sgs = [sg for sg in self.ctx.ec2.get_all_security_groups() if sg.name == self.clusterName] kwargs = {'key_name': self.keyName, 'security_group_ids': [sg.id for sg in sgs], 'instance_type': instanceType.name, 'user_data': userData, 'block_device_map': bdm, 'instance_profile_arn': arn, 'placement': getCurrentAWSZone()} kwargs["subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(self.instanceMetaData).subnet_id instancesLaunched = [] for attempt in retry(predicate=AWSProvisioner._throttlePredicate): with attempt: # after we start launching instances we want to insure the full setup is done # the biggest obstacle is AWS request throttling, so we retry on these errors at # every request in this method if not preemptable: logger.info('Launching %s non-preemptable nodes', numNodes) instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx), spec=kwargs, num_instances=numNodes) else: logger.info('Launching %s preemptable nodes', numNodes) kwargs['placement'] = getSpotZone(self.spotBids[nodeType], instanceType.name, self.ctx) # force generator to evaluate instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2, price=self.spotBids[nodeType], image_id=self._discoverAMI(self.ctx), tags={'clusterName': self.clusterName}, spec=kwargs, num_instances=numNodes, tentative=True) ) # flatten the list instancesLaunched = [item for sublist in instancesLaunched for item in sublist] for attempt in retry(predicate=AWSProvisioner._throttlePredicate): with attempt: wait_instances_running(self.ctx.ec2, instancesLaunched) # request throttling retry happens internally to these two methods to insure proper granularity AWSProvisioner._addTags(instancesLaunched, self.tags) self._propagateKey(instancesLaunched) logger.info('Launched %s new instance(s)', numNodes) return len(instancesLaunched)
def addNodes(self, numNodes, preemptable): instanceType = self._getInstanceType(preemptable) bdm = self._getBlockDeviceMapping(instanceType, rootVolSize=self.nodeStorage) arn = self._getProfileARN(self.ctx) keyPath = '' if not self.config or not self.config.sseKey else self.config.sseKey entryPoint = 'mesos-slave' if not self.config or not self.config.sseKey else "waitForKey.sh" workerData = dict(role='worker', image=applianceSelf(), entrypoint=entryPoint, sshKey=self.masterPublicKey, args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable, keyPath=keyPath)) userData = awsUserData.format(**workerData) sgs = [sg for sg in self.ctx.ec2.get_all_security_groups() if sg.name == self.clusterName] kwargs = {'key_name': self.keyName, 'security_group_ids': [sg.id for sg in sgs], 'instance_type': instanceType.name, 'user_data': userData, 'block_device_map': bdm, 'instance_profile_arn': arn, 'placement': getCurrentAWSZone()} kwargs["subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(self.instanceMetaData).subnet_id instancesLaunched = [] for attempt in retry(predicate=AWSProvisioner._throttlePredicate): with attempt: # after we start launching instances we want to insure the full setup is done # the biggest obstacle is AWS request throttling, so we retry on these errors at # every request in this method if not preemptable: logger.info('Launching %s non-preemptable nodes', numNodes) instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx), spec=kwargs, num_instances=numNodes) else: logger.info('Launching %s preemptable nodes', numNodes) kwargs['placement'] = getSpotZone(self.spotBid, instanceType.name, self.ctx) # force generator to evaluate instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2, price=self.spotBid, image_id=self._discoverAMI(self.ctx), tags={'clusterName': self.clusterName}, spec=kwargs, num_instances=numNodes, tentative=True) ) # flatten the list instancesLaunched = [item for sublist in instancesLaunched for item in sublist] for attempt in retry(predicate=AWSProvisioner._throttlePredicate): with attempt: wait_instances_running(self.ctx.ec2, instancesLaunched) # request throttling retry happens internally to these two methods to insure proper granularity AWSProvisioner._addTags(instancesLaunched, self.tags) self._propagateKey(instancesLaunched) logger.info('Launched %s new instance(s)', numNodes) return len(instancesLaunched)
def _getLeader(cls, clusterName, wait=False, zone=None): ctx = cls._buildContext(clusterName=clusterName, zone=zone) instances = cls.__getNodesInCluster(ctx, clusterName, both=True) instances.sort(key=lambda x: x.launch_time) leader = instances[0] # assume leader was launched first if wait: logger.info("Waiting for toil_leader to enter 'running' state...") wait_instances_running(ctx.ec2, [leader]) logger.info('... toil_leader is running') cls._waitForNode(leader, 'toil_leader') return leader
def _getLeader(cls, clusterName, wait=False, zone=None): ctx = cls._buildContext(clusterName=clusterName, zone=zone) instances = cls._getNodesInCluster(ctx, clusterName, both=True) instances.sort(key=lambda x: x.launch_time) try: leader = instances[0] # assume leader was launched first except IndexError: raise NoSuchClusterException(clusterName) if wait: logger.info("Waiting for toil_leader to enter 'running' state...") wait_instances_running(ctx.ec2, [leader]) logger.info('... toil_leader is running') cls._waitForNode(leader, 'toil_leader') return leader
def launchCluster(self): self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage), '-w', '2', '--nodeStorage', str(self.requestedLeaderStorage)]) ctx = AWSProvisioner._buildContext(self.clusterName) nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True) nodes.sort(key=lambda x: x.launch_time) # assuming that leader is first workers = nodes[1:] # test that two worker nodes were created self.assertEqual(2, len(workers)) # test that workers have expected storage size # just use the first worker worker = workers[0] wait_instances_running(ctx.ec2, [worker]) rootBlockDevice = worker.block_device_mapping["/dev/xvda"] self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType)) rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0] self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
def _addNodes(self, instances, numNodes, preemptable=False): bdm = self._getBlockDeviceMapping(self.instanceType) arn = self._getProfileARN(self.ctx) workerData = dict(role='worker', image=applianceSelf(), entrypoint='mesos-slave', args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable)) userData = awsUserData.format(**workerData) kwargs = {'key_name': self.keyName, 'security_groups': [self.clusterName], 'instance_type': self.instanceType.name, 'user_data': userData, 'block_device_map': bdm, 'instance_profile_arn': arn} instancesLaunched = [] if not preemptable: logger.info('Launching %s non-preemptable nodes', numNodes) instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx), spec=kwargs, num_instances=1) else: logger.info('Launching %s preemptable nodes', numNodes) kwargs['placement'] = getSpotZone(self.spotBid, self.instanceType.name, self.ctx) # force generator to evaluate instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2, price=self.spotBid, image_id=self._discoverAMI(self.ctx), tags={'clusterName': self.clusterName}, spec=kwargs, num_instances=numNodes, tentative=True) ) wait_instances_running(self.ctx.ec2, instancesLaunched) logger.info('Launched %s new instance(s)', numNodes) return len(instancesLaunched)
def launchCluster(self): self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage), '-w', '2', '--nodeStorage', str(self.requestedLeaderStorage)]) ctx = AWSProvisioner._buildContext(self.clusterName) nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True) nodes.sort(key=lambda x: x.launch_time) # assuming that leader is first workers = nodes[1:] # test that two worker nodes were created self.assertEqual(2, len(workers)) # test that workers have expected storage size # just use the first worker worker = workers[0] worker = next(wait_instances_running(ctx.ec2, [worker])) rootBlockDevice = worker.block_device_mapping["/dev/xvda"] self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType)) rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0] self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)