Exemple #1
0
    def launchCluster(self, leaderNodeType, leaderStorage, owner, **kwargs):
        """
        In addition to the parameters inherited from the abstractProvisioner,
        the AWS launchCluster takes the following parameters:
        keyName: The key used to communicate with instances
        vpcSubnet: A subnet (optional).
        """
        if 'keyName' not in kwargs:
            raise RuntimeError("A keyPairName is required for the AWS provisioner.")
        self._keyName = kwargs['keyName']
        self._vpcSubnet = kwargs['vpcSubnet'] if 'vpcSubnet' in kwargs else None

        profileARN = self._getProfileARN()
        # the security group name is used as the cluster identifier
        sgs = self._createSecurityGroup()
        bdm = self._getBlockDeviceMapping(E2Instances[leaderNodeType], rootVolSize=leaderStorage)

        self._masterPublicKey = 'AAAAB3NzaC1yc2Enoauthorizedkeyneeded' # dummy key
        userData =  self._getCloudConfigUserData('leader', self._masterPublicKey)
        specKwargs = {'key_name': self._keyName, 'security_group_ids': [sg.id for sg in sgs],
                  'instance_type': leaderNodeType,
                  'user_data': userData, 'block_device_map': bdm,
                  'instance_profile_arn': profileARN,
                  'placement': self._zone}
        if self._vpcSubnet:
            specKwargs["subnet_id"] = self._vpcSubnet
        instances = create_ondemand_instances(self._ctx.ec2, image_id=self._discoverAMI(),
                                                  spec=specKwargs, num_instances=1)

        # wait for the leader to finish setting up
        leader = instances[0]
        wait_instances_running(self._ctx.ec2, [leader])
        self._waitForIP(leader)
        leaderNode = Node(publicIP=leader.ip_address, privateIP=leader.private_ip_address,
                          name=leader.id, launchTime=leader.launch_time, nodeType=leaderNodeType,
                          preemptable=False, tags=leader.tags)
        leaderNode.waitForNode('toil_leader')

        defaultTags = {'Name': self.clusterName, 'Owner': owner}
        if kwargs['userTags']:
            defaultTags.update(kwargs['userTags'])

        # if we running launch cluster we need to save this data as it won't be generated
        # from the metadata. This data is needed to launch worker nodes.
        self._leaderPrivateIP = leader.private_ip_address
        self._addTags([leader], defaultTags)
        self._tags = leader.tags
        self._subnetID = leader.subnet_id
Exemple #2
0
    def addNodes(self, nodeType, numNodes, preemptable, spotBid=None):
        assert self._leaderPrivateIP
        if preemptable and not spotBid:
            if self._spotBidsMap and nodeType in self._spotBidsMap:
                spotBid = self._spotBidsMap[nodeType]
            else:
                raise RuntimeError(
                    "No spot bid given for a preemptable node request.")
        instanceType = E2Instances[nodeType]
        bdm = self._getBlockDeviceMapping(instanceType,
                                          rootVolSize=self._nodeStorage)

        keyPath = self._sseKey if self._sseKey else None
        userData = self._getCloudConfigUserData('worker',
                                                self._masterPublicKey, keyPath,
                                                preemptable)
        if isinstance(userData, text_type):
            # Spot-market provisioning requires bytes for user data.
            userData = userData.encode('utf-8')
        sgs = [
            sg for sg in self._ctx.ec2.get_all_security_groups()
            if sg.name in self._leaderSecurityGroupNames
        ]
        kwargs = {
            'key_name': self._keyName,
            'security_group_ids': [sg.id for sg in sgs],
            'instance_type': instanceType.name,
            'user_data': userData,
            'block_device_map': bdm,
            'instance_profile_arn': self._leaderProfileArn,
            'placement': self._zone,
            'subnet_id': self._subnetID
        }

        instancesLaunched = []

        for attempt in retry(predicate=awsRetryPredicate):
            with attempt:
                # after we start launching instances we want to ensure the full setup is done
                # the biggest obstacle is AWS request throttling, so we retry on these errors at
                # every request in this method
                if not preemptable:
                    logger.debug('Launching %s non-preemptable nodes',
                                 numNodes)
                    instancesLaunched = create_ondemand_instances(
                        self._ctx.ec2,
                        image_id=self._discoverAMI(),
                        spec=kwargs,
                        num_instances=numNodes)
                else:
                    logger.debug('Launching %s preemptable nodes', numNodes)
                    kwargs['placement'] = getSpotZone(spotBid,
                                                      instanceType.name,
                                                      self._ctx)
                    # force generator to evaluate
                    instancesLaunched = list(
                        create_spot_instances(
                            ec2=self._ctx.ec2,
                            price=spotBid,
                            image_id=self._discoverAMI(),
                            tags={'clusterName': self.clusterName},
                            spec=kwargs,
                            num_instances=numNodes,
                            tentative=True))
                    # flatten the list
                    instancesLaunched = [
                        item for sublist in instancesLaunched
                        for item in sublist
                    ]

        for attempt in retry(predicate=awsRetryPredicate):
            with attempt:
                wait_instances_running(self._ctx.ec2, instancesLaunched)

        self._tags[_TOIL_NODE_TYPE_TAG_KEY] = 'worker'
        AWSProvisioner._addTags(instancesLaunched, self._tags)
        if self._sseKey:
            for i in instancesLaunched:
                self._waitForIP(i)
                node = Node(publicIP=i.ip_address,
                            privateIP=i.private_ip_address,
                            name=i.id,
                            launchTime=i.launch_time,
                            nodeType=i.instance_type,
                            preemptable=preemptable,
                            tags=i.tags)
                node.waitForNode('toil_worker')
                node.coreRsync([self._sseKey, ':' + self._sseKey],
                               applianceName='toil_worker')
        logger.debug('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
Exemple #3
0
    def addNodes(self, nodeType, numNodes, preemptable):
        instanceType = ec2_instance_types[nodeType]
        bdm = self._getBlockDeviceMapping(instanceType,
                                          rootVolSize=self.nodeStorage)
        arn = self._getProfileARN(self.ctx)
        keyPath = '' if not self.config or not self.config.sseKey else self.config.sseKey
        entryPoint = 'mesos-slave' if not self.config or not self.config.sseKey else "waitForKey.sh"
        workerData = dict(role='worker',
                          image=applianceSelf(),
                          entrypoint=entryPoint,
                          sshKey=self.masterPublicKey,
                          args=workerArgs.format(ip=self.leaderIP,
                                                 preemptable=preemptable,
                                                 keyPath=keyPath))
        userData = awsUserData.format(**workerData)
        sgs = [
            sg for sg in self.ctx.ec2.get_all_security_groups()
            if sg.name == self.clusterName
        ]
        kwargs = {
            'key_name': self.keyName,
            'security_group_ids': [sg.id for sg in sgs],
            'instance_type': instanceType.name,
            'user_data': userData,
            'block_device_map': bdm,
            'instance_profile_arn': arn,
            'placement': getCurrentAWSZone()
        }
        kwargs[
            "subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(
                self.instanceMetaData).subnet_id

        instancesLaunched = []

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                # after we start launching instances we want to insure the full setup is done
                # the biggest obstacle is AWS request throttling, so we retry on these errors at
                # every request in this method
                if not preemptable:
                    logger.info('Launching %s non-preemptable nodes', numNodes)
                    instancesLaunched = create_ondemand_instances(
                        self.ctx.ec2,
                        image_id=self._discoverAMI(self.ctx),
                        spec=kwargs,
                        num_instances=numNodes)
                else:
                    logger.info('Launching %s preemptable nodes', numNodes)
                    kwargs['placement'] = getSpotZone(self.spotBids[nodeType],
                                                      instanceType.name,
                                                      self.ctx)
                    # force generator to evaluate
                    instancesLaunched = list(
                        create_spot_instances(
                            ec2=self.ctx.ec2,
                            price=self.spotBids[nodeType],
                            image_id=self._discoverAMI(self.ctx),
                            tags={'clusterName': self.clusterName},
                            spec=kwargs,
                            num_instances=numNodes,
                            tentative=True))
                    # flatten the list
                    instancesLaunched = [
                        item for sublist in instancesLaunched
                        for item in sublist
                    ]

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                wait_instances_running(self.ctx.ec2, instancesLaunched)

        # request throttling retry happens internally to these two methods to insure proper granularity
        AWSProvisioner._addTags(instancesLaunched, self.tags)
        self._propagateKey(instancesLaunched)

        logger.info('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
Exemple #4
0
    def launchCluster(self,
                      leaderNodeType,
                      leaderSpotBid,
                      nodeTypes,
                      preemptableNodeTypes,
                      keyName,
                      clusterName,
                      numWorkers=0,
                      numPreemptableWorkers=0,
                      spotBids=None,
                      userTags=None,
                      zone=None,
                      vpcSubnet=None,
                      leaderStorage=50,
                      nodeStorage=50):
        if self.config is None:
            self.nodeStorage = nodeStorage
        if userTags is None:
            userTags = {}
        ctx = self._buildContext(clusterName=clusterName, zone=zone)
        profileARN = self._getProfileARN(ctx)
        leaderInstanceType = ec2_instance_types[leaderNodeType]
        # the security group name is used as the cluster identifier
        sgs = self._createSecurityGroup(ctx, clusterName, vpcSubnet)
        bdm = self._getBlockDeviceMapping(leaderInstanceType,
                                          rootVolSize=leaderStorage)
        self.masterPublicKey = 'AAAAB3NzaC1yc2Enoauthorizedkeyneeded'
        leaderData = dict(role='leader',
                          image=applianceSelf(),
                          entrypoint='mesos-master',
                          sshKey=self.masterPublicKey,
                          args=leaderArgs.format(name=clusterName))
        userData = awsUserData.format(**leaderData)
        kwargs = {
            'key_name': keyName,
            'security_group_ids': [sg.id for sg in sgs],
            'instance_type': leaderNodeType,
            'user_data': userData,
            'block_device_map': bdm,
            'instance_profile_arn': profileARN,
            'placement': zone
        }
        if vpcSubnet:
            kwargs["subnet_id"] = vpcSubnet
        if not leaderSpotBid:
            logger.info('Launching non-preemptable leader')
            create_ondemand_instances(ctx.ec2,
                                      image_id=self._discoverAMI(ctx),
                                      spec=kwargs,
                                      num_instances=1)
        else:
            logger.info('Launching preemptable leader')
            # force generator to evaluate
            list(
                create_spot_instances(ec2=ctx.ec2,
                                      price=leaderSpotBid,
                                      image_id=self._discoverAMI(ctx),
                                      tags={'clusterName': clusterName},
                                      spec=kwargs,
                                      num_instances=1))
        leader = self._getLeader(clusterName=clusterName, wait=True, zone=zone)

        defaultTags = {'Name': clusterName, 'Owner': keyName}
        defaultTags.update(userTags)

        # if we running launch cluster we need to save this data as it won't be generated
        # from the metadata. This data is needed to launch worker nodes.
        self.leaderIP = leader.private_ip_address
        self._addTags([leader], defaultTags)
        self.ctx = ctx
        if spotBids:
            self.spotBids = dict(zip(preemptableNodeTypes, spotBids))
        self.clusterName = clusterName
        self.keyName = keyName
        self.tags = leader.tags
        self.subnetID = leader.subnet_id
        # assuming that if the leader was launched without a spotbid then all workers
        # will be non-preemptable
        workersCreated = 0
        for nodeType, workers in zip(nodeTypes, numWorkers):
            workersCreated += self.addNodes(nodeType=nodeType,
                                            numNodes=workers,
                                            preemptable=False)
        for nodeType, workers in zip(preemptableNodeTypes,
                                     numPreemptableWorkers):
            workersCreated += self.addNodes(nodeType=nodeType,
                                            numNodes=workers,
                                            preemptable=True)
        logger.info('Added %d workers', workersCreated)

        return leader