Beispiel #1
0
 def testAMIFinding(self):
     for zone in ['us-west-2a', 'eu-central-1a', 'sa-east-1b']:
         provisioner = AWSProvisioner('fakename', 'mesos', zone, 10000,
                                      None, None)
         ami = provisioner._discoverAMI()
         # Make sure we got an AMI and it looks plausible
         assert (ami.startswith('ami-'))
Beispiel #2
0
    def _test(self, spotInstances=False):
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.createClusterUtil()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        leader = AWSProvisioner._getLeader(wait=False, clusterName=self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = ['virtualenv', '--system-site-packages', '--never-download',
                        '/home/venv']
        self.sshUtil(venv_command)

        upgrade_command = ['/home/venv/bin/pip', 'install', 'setuptools==28.7.1']
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        # install toil scripts
        install_command = ['/home/venv/bin/pip', 'install', 'toil-scripts==%s' % self.toilScripts]
        self.sshUtil(install_command)

        toilOptions = ['--batchSystem=mesos',
                       '--workDir=/var/lib/toil',
                       '--mesosMaster=%s:5050' % leader.private_ip_address,
                       '--clean=always',
                       '--retryCount=2']

        toilOptions.extend(['--provisioner=aws',
                            '--nodeType=' + self.instanceType,
                            '--maxNodes=%s' % self.numWorkers,
                            '--logDebug'])
        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' % (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers])

        toilOptions = ' '.join(toilOptions)

        # TOIL_AWS_NODE_DEBUG prevents the provisioner from killing nodes that
        # fail a status check. This allows for easier debugging of
        # https://github.com/BD2KGenomics/toil/issues/1141
        runCommand = ['bash', '-c',
                      'PATH=/home/venv/bin/:$PATH '
                      'TOIL_AWS_NODE_DEBUG=True '
                      'TOIL_SCRIPTS_TEST_NUM_SAMPLES='+str(self.numSamples)+
                      ' TOIL_SCRIPTS_TEST_TOIL_OPTIONS=' + pipes.quote(toilOptions) +
                      ' TOIL_SCRIPTS_TEST_JOBSTORE=' + self.jobStore +
                      ' /home/venv/bin/python -m unittest -v' +
                      ' toil_scripts.rnaseq_cgl.test.test_rnaseq_cgl.RNASeqCGLTest.test_manifest']

        self.sshUtil(runCommand)
        assert len(self.getMatchingRoles(self.clusterName)) == 1

        AWSProvisioner.destroyCluster(self.clusterName)
        assert len(self.getMatchingRoles(self.clusterName)) == 0
Beispiel #3
0
    def launchCluster(self):
        from boto.ec2.blockdevicemapping import BlockDeviceType
        self.createClusterUtil(args=[
            '--leaderStorage',
            str(self.requestedLeaderStorage), '--nodeTypes', ",".join(
                self.instanceTypes), '-w', ",".join(self.numWorkers),
            '--nodeStorage',
            str(self.requestedLeaderStorage)
        ])

        ctx = AWSProvisioner._buildContext(self.clusterName)
        nodes = AWSProvisioner._getNodesInCluster(ctx,
                                                  self.clusterName,
                                                  both=True)
        nodes.sort(key=lambda x: x.launch_time)
        # assuming that leader is first
        workers = nodes[1:]
        # test that two worker nodes were created
        self.assertEqual(2, len(workers))
        # test that workers have expected storage size
        # just use the first worker
        worker = workers[0]
        worker = next(wait_instances_running(ctx.ec2, [worker]))
        rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
        self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
        rootVolume = ctx.ec2.get_all_volumes(
            volume_ids=[rootBlockDevice.volume_id])[0]
        self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
Beispiel #4
0
    def _getScript(self):
        def restartScript():
            from toil.job import Job
            import argparse
            import os

            def f0(job):
                if 'FAIL' in os.environ:
                    raise RuntimeError('failed on purpose')

            if __name__ == '__main__':
                parser = argparse.ArgumentParser()
                Job.Runner.addToilOptions(parser)
                options = parser.parse_args()
                rootJob = Job.wrapJobFn(f0,
                                        cores=0.5,
                                        memory='50 M',
                                        disk='50 M')
                Job.Runner.startToil(rootJob, options)

        script = dedent('\n'.join(getsource(restartScript).split('\n')[1:]))
        # use appliance ssh method instead of sshutil so we can specify input param
        AWSProvisioner._sshAppliance(self.leader.ip_address,
                                     'tee',
                                     self.scriptName,
                                     input=script)
Beispiel #5
0
    def _getScript(self):
        def userScript():
            from toil.job import Job
            from toil.common import Toil

            # Because this is the only job in the pipeline and because it is preemptable,
            # there will be no non-preemptable jobs. The non-preemptable scaler will therefore
            # not request any nodes initially. And since we made it impossible for the
            # preemptable scaler to allocate any nodes (using an abnormally low spot bid),
            # we will observe a deficit of preemptable nodes that the non-preemptable scaler will
            # compensate for by spinning up non-preemptable nodes instead.
            #
            def job(job, disk='10M', cores=1, memory='10M', preemptable=True):
                pass

            if __name__ == '__main__':
                options = Job.Runner.getDefaultArgumentParser().parse_args()
                with Toil(options) as toil:
                    if toil.config.restart:
                        toil.restart()
                    else:
                        toil.start(Job.wrapJobFn(job))

        script = dedent('\n'.join(getsource(userScript).split('\n')[1:]))
        # use appliance ssh method instead of sshutil so we can specify input param
        AWSProvisioner._sshAppliance(self.leader.ip_address,
                                     'tee',
                                     '/home/userScript.py',
                                     input=script)
Beispiel #6
0
    def _getScript(self):
        def userScript():
            from toil.job import Job
            from toil.common import Toil

            # Because this is the only job in the pipeline and because it is preemptable,
            # there will be no non-preemptable jobs. The non-preemptable scaler will therefore
            # not request any nodes initially. And since we made it impossible for the
            # preemptable scaler to allocate any nodes (using an abnormally low spot bid),
            # we will observe a deficit of preemptable nodes that the non-preemptable scaler will
            # compensate for by spinning up non-preemptable nodes instead.
            #
            def job(job, disk='10M', cores=1, memory='10M', preemptable=True):
                pass

            if __name__ == '__main__':
                options = Job.Runner.getDefaultArgumentParser().parse_args()
                with Toil(options) as toil:
                    if toil.config.restart:
                        toil.restart()
                    else:
                        toil.start(Job.wrapJobFn(job))

        script = dedent('\n'.join(getsource(userScript).split('\n')[1:]))
        # use appliance ssh method instead of sshutil so we can specify input param
        AWSProvisioner._sshAppliance(self.leader.ip_address, 'tee', '/home/userScript.py', input=script)
Beispiel #7
0
 def launchCluster(self):
     self.createClusterUtil(args=['-w', '2'])
     ctx = AWSProvisioner._buildContext(self.clusterName)
     # test that two worker nodes were created + 1 for leader
     self.assertEqual(
         2 + 1,
         len(
             AWSProvisioner._getNodesInCluster(ctx,
                                               self.clusterName,
                                               both=True)))
Beispiel #8
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--nodeType", dest='nodeType', required=True,
                        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS key pair to include on the instance")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its"
                             "children. Tags are of the form: "
                             " -t key1=value1 --tag key2=value2 "
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {"
                             "      \"Name\": clusterName,"
                             "      \"Owner\": IAM username"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified."
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("-w", "--workers", dest='workers', default=0, type=int,
                        help="Specify a number of workers to launch alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError('The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner()
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              keyName=config.keyPairName,
                              clusterName=config.clusterName,
                              workers=config.workers,
                              spotBid=spotBid,
                              userTags=tagsDict,
                              zone=config.zone,
                              vpcSubnet=config.vpcSubnet)
Beispiel #9
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        try:
            system([
                self.toilMain, 'launch-cluster', '--nodeType=t2.micro',
                '--keyPairName=jenkins@jenkins-master', clusterName,
                '--provisioner=aws'
            ])
        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
            # launch preemptable master with same name
            system([
                self.toilMain, 'launch-cluster', '--nodeType=m3.medium:0.2',
                '--keyPairName=jenkins@jenkins-master', clusterName,
                '--provisioner=aws', '--logLevel=DEBUG'
            ])
            system([
                self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName
            ])

            testStrings = [
                "'foo'", '"foo"', '  foo', '$PATH', '"', "'", '\\', '| cat',
                '&& cat', '; cat'
            ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail(
                    'The remote command failed silently where it should have '
                    'raised an error')

        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
Beispiel #10
0
def cluster_factory(provisioner, clusterName=None, clusterType='mesos', zone=None, nodeStorage=50, nodeStorageOverrides=None, sseKey=None):
    """
    Find and instantiate the appropriate provisioner instance to make clusters
    in the given cloud.

    Raises ClusterTypeNotSupportedException if the given provisioner does not
    implement clusters of the given type.

    :param provisioner: The cloud type of the cluster.
    :param clusterName: The name of the cluster.
    :param clusterType: The type of cluster: 'mesos' or 'kubernetes'.
    :param zone: The cloud zone
    :return: A cluster object for the the cloud type.
    """
    if provisioner == 'aws':
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error('The aws extra must be installed to use this provisioner')
            raise
        return AWSProvisioner(clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides, sseKey)
    elif provisioner == 'gce':
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error('The google extra must be installed to use this provisioner')
            raise
        return GCEProvisioner(clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides, sseKey)
    else:
        raise RuntimeError("Invalid provisioner '%s'" % provisioner)
Beispiel #11
0
def cluster_factory(provisioner,
                    clusterName=None,
                    zone=None,
                    nodeStorage=50,
                    nodeStorageOverrides=None,
                    sseKey=None):
    """
    :param clusterName: The name of the cluster.
    :param provisioner: The cloud type of the cluster.
    :param zone: The cloud zone
    :return: A cluster object for the the cloud type.
    """
    if provisioner == 'aws':
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error(
                'The aws extra must be installed to use this provisioner')
            raise
        return AWSProvisioner(clusterName, zone, nodeStorage,
                              nodeStorageOverrides, sseKey)
    elif provisioner == 'gce':
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error(
                'The google extra must be installed to use this provisioner')
            raise
        return GCEProvisioner(clusterName, zone, nodeStorage,
                              nodeStorageOverrides, sseKey)
    else:
        raise RuntimeError("Invalid provisioner '%s'" % provisioner)
Beispiel #12
0
 def launchCluster(self):
     self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
                                  '-w', '2', '--nodeStorage', str(self.requestedLeaderStorage)])
     ctx = AWSProvisioner._buildContext(self.clusterName)
     nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True)
     nodes.sort(key=lambda x: x.launch_time)
     # assuming that leader is first
     workers = nodes[1:]
     # test that two worker nodes were created
     self.assertEqual(2, len(workers))
     # test that workers have expected storage size
     # just use the first worker
     worker = workers[0]
     worker = next(wait_instances_running(ctx.ec2, [worker]))
     rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
     self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
     rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0]
     self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
Beispiel #13
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        try:
            system([self.toilMain, 'launch-cluster', '--nodeType=t2.micro', '--keyPairName=jenkins@jenkins-master',
                    clusterName, '--provisioner=aws'])
        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
            # launch preemptable master with same name
            system([self.toilMain, 'launch-cluster', '--nodeType=m3.medium:0.2', '--keyPairName=jenkins@jenkins-master',
                    clusterName, '--provisioner=aws', '--logLevel=DEBUG'])
            system([self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName])

            testStrings = ["'foo'",
                           '"foo"',
                           '  foo',
                           '$PATH',
                           '"',
                           "'",
                           '\\',
                           '| cat',
                           '&& cat',
                           '; cat'
                           ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail('The remote command failed silently where it should have '
                          'raised an error')

        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
Beispiel #14
0
    def _getScript(self):
        def restartScript():
            from toil.job import Job
            import argparse
            import os

            def f0(job):
                if 'FAIL' in os.environ:
                    raise RuntimeError('failed on purpose')

            if __name__ == '__main__':
                parser = argparse.ArgumentParser()
                Job.Runner.addToilOptions(parser)
                options = parser.parse_args()
                rootJob = Job.wrapJobFn(f0, cores=0.5, memory='50 M', disk='50 M')
                Job.Runner.startToil(rootJob, options)

        script = dedent('\n'.join(getsource(restartScript).split('\n')[1:]))
        # use appliance ssh method instead of sshutil so we can specify input param
        AWSProvisioner._sshAppliance(self.leader.ip_address, 'tee', self.scriptName, input=script)
Beispiel #15
0
 def getRootVolID(self):
     """
     Adds in test to check that EBS volume is build with adequate size.
     Otherwise is functionally equivalent to parent.
     :return: volumeID
     """
     volumeID = super(AWSAutoscaleTest, self).getRootVolID()
     ctx = AWSProvisioner._buildContext(self.clusterName)
     rootVolume = ctx.ec2.get_all_volumes(volume_ids=[volumeID])[0]
     # test that the leader is given adequate storage
     self.assertGreaterEqual(rootVolume.size, self.requestedLeaderStorage)
     return volumeID
Beispiel #16
0
 def getRootVolID(self):
     """
     Adds in test to check that EBS volume is build with adequate size.
     Otherwise is functionally equivalent to parent.
     :return: volumeID
     """
     volumeID = super(AWSAutoscaleTest, self).getRootVolID()
     ctx = AWSProvisioner._buildContext(self.clusterName)
     rootVolume = ctx.ec2.get_all_volumes(volume_ids=[volumeID])[0]
     # test that the leader is given adequate storage
     self.assertGreaterEqual(rootVolume.size, self.requestedLeaderStorage)
     return volumeID
Beispiel #17
0
 def _setProvisioner(self):
     if self.config.provisioner is None:
         self._provisioner = None
     elif self.config.provisioner == 'cgcloud':
         logger.info('Using cgcloud provisioner.')
         from toil.provisioners.cgcloud.provisioner import CGCloudProvisioner
         self._provisioner = CGCloudProvisioner(self.config,
                                                self._batchSystem)
     elif self.config.provisioner == 'aws':
         logger.info('Using AWS provisioner.')
         from toil.provisioners.aws.awsProvisioner import AWSProvisioner
         self._provisioner = AWSProvisioner(self.config, self._batchSystem)
     else:
         # Command line parser shold have checked argument validity already
         assert False, self.config.provisioner
Beispiel #18
0
def clusterFactory(provisioner,
                   clusterName=None,
                   zone=None,
                   nodeStorage=50,
                   sseKey=None):
    """
    :param clusterName: The name of the cluster.
    :param provisioner: The cloud type of the cluster.
    :param zone: The cloud zone
    :return: A cluster object for the the cloud type.
    """
    if provisioner == 'aws':
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
            from toil.lib.ec2Credentials import enable_metadata_credential_caching
        except ImportError:
            logger.error(
                'The aws extra must be installed to use this provisioner')
            raise
        enable_metadata_credential_caching()  # monkey patch for AWS
        return AWSProvisioner(clusterName, zone, nodeStorage, sseKey)
    elif provisioner == 'gce':
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error(
                'The google extra must be installed to use this provisioner')
            raise
        return GCEProvisioner(clusterName, zone, nodeStorage, sseKey)
    elif provisioner == 'azure':
        try:
            from toil.provisioners.azure.azureProvisioner import AzureProvisioner
        except ImportError:
            logger.error(
                'The azure extra must be installed to use this provisioner')
            raise
        return AzureProvisioner(clusterName, zone, nodeStorage)
    else:
        raise RuntimeError("Invalid provisioner '%s'" % provisioner)
Beispiel #19
0
    def test_read_write_global_files(self):
        """
        Make sure the `_write_file_to_cloud()` and `_read_file_from_cloud()`
        functions of the AWS provisioner work as intended.
        """
        provisioner = AWSProvisioner(f'aws-provisioner-test-{uuid4()}',
                                     'mesos', 'us-west-2a', 50, None, None)
        key = 'config/test.txt'
        contents = b"Hello, this is a test."

        try:
            url = provisioner._write_file_to_cloud(key, contents=contents)
            self.assertTrue(url.startswith("s3://"))

            self.assertEqual(contents, provisioner._read_file_from_cloud(key))
        finally:
            # the cluster was never launched, but we need to clean up the s3 bucket
            provisioner.destroyCluster()
Beispiel #20
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        keyName = os.getenv('TOIL_AWS_KEYNAME')

        try:
            # --provisioner flag should default to aws, so we're not explicitly
            # specifying that here
            system([self.toilMain, 'launch-cluster', '--nodeType=t2.micro',
                    '--keyPairName=' + keyName, clusterName])
        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner

            userTags = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
            tags = {'Name': clusterName, 'Owner': keyName}
            tags.update(userTags)

            # launch preemptable master with same name
            system([self.toilMain, 'launch-cluster', '-t', 'key1=value1', '-t', 'key2=value2', '--tag', 'key3=value3',
                    '--nodeType=m3.medium:0.2', '--keyPairName=' + keyName, clusterName,
                    '--provisioner=aws', '--logLevel=DEBUG'])

            # test leader tags
            leaderTags = AWSProvisioner._getLeader(clusterName).tags
            self.assertEqual(tags, leaderTags)

            # Test strict host key checking
            # Doesn't work when run locally.
            if(keyName == 'jenkins@jenkins-master'):
                try:
                    AWSProvisioner.sshLeader(clusterName=clusterName, strict=True)
                except RuntimeError:
                    pass
                else:
                    self.fail("Host key verification passed where it should have failed")

            # Add the host key to known_hosts so that the rest of the tests can
            # pass without choking on the verification prompt.
            AWSProvisioner.sshLeader(clusterName=clusterName, strict=True, sshOptions=['-oStrictHostKeyChecking=no'])

            system([self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName])

            testStrings = ["'foo'",
                           '"foo"',
                           '  foo',
                           '$PATH',
                           '"',
                           "'",
                           '\\',
                           '| cat',
                           '&& cat',
                           '; cat'
                           ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail('The remote command failed silently where it should have '
                          'raised an error')

            AWSProvisioner.sshLeader(clusterName=clusterName,
                                     args=['python', '-c', "import os; assert os.environ['TOIL_WORKDIR']=='/var/lib/toil'"])

            # `toil rsync-cluster`
            # Testing special characters - string.punctuation
            fname = '!"#$%&\'()*+,-.;<=>:\ ?@[\\\\]^_`{|}~'
            testData = os.urandom(3 * (10**6))
            with tempfile.NamedTemporaryFile(suffix=fname) as tmpFile:
                relpath = os.path.basename(tmpFile.name)
                tmpFile.write(testData)
                tmpFile.flush()
                # Upload file to leader
                AWSProvisioner.rsyncLeader(clusterName=clusterName, args=[tmpFile.name, ":"])
                # Ensure file exists
                AWSProvisioner.sshLeader(clusterName=clusterName, args=["test", "-e", relpath])
            tmpDir = tempfile.mkdtemp()
            # Download the file again and make sure it's the same file
            # `--protect-args` needed because remote bash chokes on special characters
            AWSProvisioner.rsyncLeader(clusterName=clusterName, args=["--protect-args", ":" + relpath, tmpDir])
            with open(os.path.join(tmpDir, relpath), "r") as f:
                self.assertEqual(f.read(), testData, "Downloaded file does not match original file")
        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
            try:
                shutil.rmtree(tmpDir)
            except NameError:
                pass
Beispiel #21
0
    def _test(self, spotInstances=False, fulfillableBid=True):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order

        :param spotInstances: Specify if you want to use spotInstances
        :param fulfillableBid: If false, the bid will never succeed. Used to test bid failure
        """
        if not fulfillableBid:
            self.spotBid = '0.01'
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False, clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = ['virtualenv', '--system-site-packages', '--never-download',
                        '/home/venv']
        self.sshUtil(venv_command)

        upgrade_command = ['/home/venv/bin/pip', 'install', 'setuptools==28.7.1']
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [self.jobStore,
                       '--batchSystem=mesos',
                       '--workDir=/var/lib/toil',
                       '--clean=always',
                       '--retryCount=2',
                       '--clusterStats=/home/',
                       '--logDebug',
                       '--logFile=/home/sort.log',
                       '--provisioner=aws']

        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' % (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers])
        else:
            toilOptions.extend(['--nodeType=' + self.instanceType,
                                '--maxNodes=%s' % self.numWorkers])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = ['/home/venv/bin/python', '-c',
                             'import json; import os; '
                             'json.load(open("/home/" + [f for f in os.listdir("/home/") '
                                                   'if f.endswith(".json")].pop()))'
                             ]

        self.sshUtil(checkStatsCommand)

        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' % volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
Beispiel #22
0
 def getMatchingRoles(self, clusterName):
     from toil.provisioners.aws.awsProvisioner import AWSProvisioner
     ctx = AWSProvisioner._buildContext(clusterName)
     roles = list(ctx.local_roles())
     return roles
Beispiel #23
0
    def _test(self, spotInstances=False):
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner

        leader = AWSProvisioner.launchCluster(instanceType=self.instanceType,
                                              keyName=self.keyName,
                                              clusterName=self.clusterName)

        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = 'virtualenv --system-site-packages --never-download /home/venv'
        AWSProvisioner._sshAppliance(leader.ip_address, command=venv_command)

        upgrade_command = '/home/venv/bin/pip install setuptools==28.7.1'
        AWSProvisioner._sshAppliance(leader.ip_address,
                                     command=upgrade_command)

        yaml_command = '/home/venv/bin/pip install pyyaml==3.12'
        AWSProvisioner._sshAppliance(leader.ip_address, command=yaml_command)

        # install toil scripts
        install_command = ('/home/venv/bin/pip install toil-scripts==%s' %
                           self.toilScripts)
        AWSProvisioner._sshAppliance(leader.ip_address,
                                     command=install_command)

        # install curl
        install_command = 'sudo apt-get -y install curl'
        AWSProvisioner._sshAppliance(leader.ip_address,
                                     command=install_command)

        toilOptions = [
            '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--mesosMaster=%s:5050' % leader.private_ip_address,
            '--clean=always', '--retryCount=0'
        ]

        toilOptions.extend([
            '--provisioner=aws', '--nodeType=' + self.instanceType,
            '--maxNodes=%s' % self.numWorkers, '--logDebug'
        ])
        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' %
                (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers
            ])

        toilOptions = ' '.join(toilOptions)

        runCommand = 'bash -c \\"export PATH=/home/venv/bin/:$PATH;export TOIL_SCRIPTS_TEST_NUM_SAMPLES=%i; export TOIL_SCRIPTS_TEST_TOIL_OPTIONS=' + pipes.quote(toilOptions) + \
                     '; export TOIL_SCRIPTS_TEST_JOBSTORE=' + self.jobStore + \
                     '; /home/venv/bin/python -m unittest -v' + \
                     ' toil_scripts.rnaseq_cgl.test.test_rnaseq_cgl.RNASeqCGLTest.test_manifest\\"'

        runCommand %= self.numSamples

        AWSProvisioner._sshAppliance(leader.ip_address, runCommand)
Beispiel #24
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "--leaderNodeType",
        dest="leaderNodeType",
        required=True,
        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        required=True,
        help="On AWS, the name of the AWS key pair to include on the instance."
        " On Google/GCE, this is the ssh key pair."
        " On Azure, this will be used as the owner tag.")
    parser.add_argument(
        "--publicKeyFile",
        dest='publicKeyFile',
        default="~/.ssh/id_rsa.pub",
        help="On Azure, the file"
        " containing the key pairs (the first key pair will be used).")
    parser.add_argument(
        "--boto",
        dest='botoPath',
        help="The path to the boto credentials directory. This is transferred "
        "to all nodes in order to access the AWS jobStore from non-AWS instances."
    )
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its "
        "children. Tags are of the form:\n"
        " -t key1=value1 --tag key2=value2\n"
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {\n"
        "      \"Name\": clusterName,\n"
        "      \"Owner\": IAM username\n"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "--nodeTypes",
        dest='nodeTypes',
        default=None,
        type=str,
        help="Comma-separated list of node types to create while launching the "
        "leader. The syntax for each node type depends on the provisioner "
        "used. For the aws provisioner this is the name of an EC2 instance "
        "type followed by a colon and the price in dollar to bid for a spot "
        "instance, for example 'c3.8xlarge:0.42'. Must also provide the "
        "--workers argument to specify how many workers of each node type "
        "to create.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=None,
        type=str,
        help=
        "Comma-separated list of the number of workers of each node type to "
        "launch alongside the leader when the cluster is created. This can be "
        "useful if running toil without auto-scaling but with need of more "
        "hardware support")
    parser.add_argument(
        "--leaderStorage",
        dest='leaderStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for the leader "
        "instance.  This is an EBS volume.")
    parser.add_argument(
        "--nodeStorage",
        dest='nodeStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for any worker "
        "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument(
        '--forceDockerAppliance',
        dest='forceDockerAppliance',
        action='store_true',
        default=False,
        help=
        "Disables sanity checking the existence of the docker image specified "
        "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
        "autoscaling.")
    parser.add_argument(
        "--azureStorageCredentials",
        dest='azureStorageCredentials',
        type=str,
        default=credential_file_path,
        help=
        "The location of the file containing the Azure storage credentials. If not specified,"
        " the default file is used with Azure provisioning. Use 'None' to disable"
        " the transfer of credentials.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    checkToilApplianceSelf = applianceSelf(
        forceDockerAppliance=config.forceDockerAppliance)

    spotBids = []
    nodeTypes = []
    preemptableNodeTypes = []
    numNodes = []
    numPreemptableNodes = []
    leaderSpotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error(
                'The aws extra must be installed to use this provisioner')
            raise
        provisioner = AWSProvisioner()
    elif config.provisioner == 'azure':
        try:
            from toil.provisioners.azure.azureProvisioner import AzureProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AzureProvisioner()
    elif config.provisioner == 'gce':
        logger.info('Using a gce provisioner.')
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error(
                'The google extra must be installed to use this provisioner')
            raise
        provisioner = GCEProvisioner()
    else:
        assert False

    #Parse leader node type and spot bid
    parsedBid = config.leaderNodeType.split(':', 1)
    if len(config.leaderNodeType) != len(parsedBid[0]):
        leaderSpotBid = float(parsedBid[1])
        config.leaderNodeType = parsedBid[0]

    if (config.nodeTypes
            or config.workers) and not (config.nodeTypes and config.workers):
        raise RuntimeError(
            "The --nodeTypes and --workers options must be specified together,"
        )
    if config.nodeTypes:
        nodeTypesList = config.nodeTypes.split(",")
        numWorkersList = config.workers.split(",")
        if not len(nodeTypesList) == len(numWorkersList):
            raise RuntimeError(
                "List of node types must be the same length as the list of workers."
            )
        for nodeTypeStr, num in zip(nodeTypesList, numWorkersList):
            parsedBid = nodeTypeStr.split(':', 1)
            if len(nodeTypeStr) != len(parsedBid[0]):
                #Is a preemptable node
                preemptableNodeTypes.append(parsedBid[0])
                spotBids.append(float(parsedBid[1]))
                numPreemptableNodes.append(int(num))
            else:
                nodeTypes.append(nodeTypeStr)
                numNodes.append(int(num))

    provisioner.launchCluster(
        leaderNodeType=config.leaderNodeType,
        leaderSpotBid=leaderSpotBid,
        nodeTypes=nodeTypes,
        preemptableNodeTypes=preemptableNodeTypes,
        numWorkers=numNodes,
        numPreemptableWorkers=numPreemptableNodes,
        keyName=config.keyPairName,
        botoPath=config.botoPath,
        clusterName=config.clusterName,
        spotBids=spotBids,
        userTags=tagsDict,
        zone=config.zone,
        leaderStorage=config.leaderStorage,
        nodeStorage=config.nodeStorage,
        vpcSubnet=config.vpcSubnet,
        publicKeyFile=config.publicKeyFile,
        azureStorageCredentials=config.azureStorageCredentials)
Beispiel #25
0
 def getMatchingRoles(self, clusterName):
     from toil.provisioners.aws.awsProvisioner import AWSProvisioner
     ctx = AWSProvisioner._buildContext(clusterName)
     roles = list(ctx.local_roles())
     return roles
Beispiel #26
0
    def _test(self, preemptableJobs=False):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order
        """
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False,
                                                clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--never-download',
            '/home/venv'
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            '/home/venv/bin/pip', 'install', 'setuptools==28.7.1'
        ]
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [
            self.jobStore, '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--clean=always', '--retryCount=2', '--clusterStats=/home/',
            '--logDebug', '--logFile=/home/sort.log', '--provisioner=aws'
        ]

        toilOptions.extend([
            '--nodeTypes=' + ",".join(self.instanceTypes),
            '--maxNodes=%s' % ",".join(self.numWorkers)
        ])
        if preemptableJobs:
            toilOptions.extend(['--defaultPreemptable'])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = [
            '/home/venv/bin/python', '-c', 'import json; import os; '
            'json.load(open("/home/" + [f for f in os.listdir("/home/") '
            'if f.endswith(".json")].pop()))'
        ]

        self.sshUtil(checkStatsCommand)

        from boto.exception import EC2ResponseError
        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
Beispiel #27
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--nodeType", dest='nodeType', required=True,
                        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS key pair to include on the instance")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " --t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified. "
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("-w", "--workers", dest='workers', default=0, type=int,
                        help="Specify a number of workers to launch alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader instance. "
                             "This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker instances "
                             "created when using the -w flag. This is an EBS volume.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError('The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner()
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              keyName=config.keyPairName,
                              clusterName=config.clusterName,
                              workers=config.workers,
                              spotBid=spotBid,
                              userTags=tagsDict,
                              zone=config.zone,
                              leaderStorage=config.leaderStorage,
                              nodeStorage=config.nodeStorage,
                              vpcSubnet=config.vpcSubnet)
Beispiel #28
0
 def tearDown(self):
     from toil.provisioners.aws.awsProvisioner import AWSProvisioner
     AWSProvisioner.destroyCluster(self.clusterName)
Beispiel #29
0
    def _test(self, spotInstances=False, fulfillableBid=True):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order

        :param spotInstances: Specify if you want to use spotInstances
        :param fulfillableBid: If false, the bid will never succeed. Used to test bid failure
        """
        if not fulfillableBid:
            self.spotBid = '0.01'
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False,
                                                clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--never-download',
            '/home/venv'
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            '/home/venv/bin/pip', 'install', 'setuptools==28.7.1'
        ]
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [
            self.jobStore, '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--clean=always', '--retryCount=2', '--clusterStats=/home/',
            '--logDebug', '--logFile=/home/sort.log', '--provisioner=aws'
        ]

        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' %
                (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers
            ])
        else:
            toilOptions.extend([
                '--nodeType=' + self.instanceType,
                '--maxNodes=%s' % self.numWorkers
            ])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = [
            '/home/venv/bin/python', '-c', 'import json; import os; '
            'json.load(open("/home/" + [f for f in os.listdir("/home/") '
            'if f.endswith(".json")].pop()))'
        ]

        self.sshUtil(checkStatsCommand)

        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
Beispiel #30
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--leaderNodeType", dest="leaderNodeType", required=True,
                        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS or ssh key pair to include on the instance")
    parser.add_argument("--boto", dest='botoPath',
                        help="The path to the boto credentials directory. This is transferred to all "
                             " nodes in order to access the AWS jobStore from non-AWS instances.")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " -t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified. "
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("--nodeTypes", dest='nodeTypes', default=None, type=str,
                        help="Comma-separated list of node types to create while launching the leader. The "
                             "syntax for each node type depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'. Must also provide "
                             "the --workers argument to specify how many workers of each node type to create")
    parser.add_argument("-w", "--workers", dest='workers', default=None, type=str,
                        help="Comma-separated list of the number of workers of each node type to launch "
                             "alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader instance. "
                             "This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker instances "
                             "created when using the -w flag. This is an EBS volume.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBids = []
    nodeTypes = []
    preemptableNodeTypes = []
    numNodes = []
    numPreemptableNodes = []
    leaderSpotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error('The aws extra must be installed to use this provisioner')
            raise
        provisioner = AWSProvisioner()
    elif config.provisioner == 'gce':
        logger.info('Using a gce provisioner.')
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error('The google extra must be installed to use this provisioner')
            raise
        provisioner = GCEProvisioner()
    else:
        assert False


    #Parse leader node type and spot bid
    parsedBid = config.leaderNodeType.split(':', 1)
    if len(config.leaderNodeType) != len(parsedBid[0]):
        leaderSpotBid = float(parsedBid[1])
        config.leaderNodeType = parsedBid[0]

    if (config.nodeTypes or config.workers) and not (config.nodeTypes and config.workers):
        raise RuntimeError("The --nodeTypes and --workers options must be specified together,")
    if config.nodeTypes:
        nodeTypesList = config.nodeTypes.split(",")
        numWorkersList = config.workers.split(",")
        if not len(nodeTypesList) == len(numWorkersList):
            raise RuntimeError("List of node types must be same length as list of numbers of workers.")
        for nodeTypeStr, num in zip(nodeTypesList, numWorkersList):
            parsedBid = nodeTypeStr.split(':', 1)
            if len(nodeTypeStr) != len(parsedBid[0]):
                #Is a preemptable node

                preemptableNodeTypes.append(parsedBid[0])
                spotBids.append(float(parsedBid[1]))
                numPreemptableNodes.append(int(num))
            else:
                nodeTypes.append(nodeTypeStr)
                numNodes.append(int(num))


    provisioner.launchCluster(leaderNodeType=config.leaderNodeType,
                              leaderSpotBid=leaderSpotBid,
                              nodeTypes=nodeTypes,
                              preemptableNodeTypes=preemptableNodeTypes,
                              numWorkers=numNodes,
                              numPreemptableWorkers = numPreemptableNodes,
                              keyName=config.keyPairName,
                              botoPath=config.botoPath,
                              clusterName=config.clusterName,
                              spotBids=spotBids,
                              userTags=tagsDict,
                              zone=config.zone,
                              leaderStorage=config.leaderStorage,
                              nodeStorage=config.nodeStorage,
                              vpcSubnet=config.vpcSubnet)
Beispiel #31
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        keyName = os.getenv('TOIL_AWS_KEYNAME')

        try:
            # --provisioner flag should default to aws, so we're not explicitly
            # specifying that here
            system([
                self.toilMain, 'launch-cluster', '--leaderNodeType=t2.micro',
                '--keyPairName=' + keyName, clusterName
            ])
        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner

            userTags = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
            tags = {'Name': clusterName, 'Owner': keyName}
            tags.update(userTags)

            # launch preemptable master with same name
            system([
                self.toilMain, 'launch-cluster', '-t', 'key1=value1', '-t',
                'key2=value2', '--tag', 'key3=value3',
                '--leaderNodeType=m3.medium:0.2', '--keyPairName=' + keyName,
                clusterName, '--provisioner=aws', '--logLevel=DEBUG'
            ])

            # test leader tags
            leaderTags = AWSProvisioner._getLeader(clusterName).tags
            self.assertEqual(tags, leaderTags)

            # Test strict host key checking
            # Doesn't work when run locally.
            if (keyName == 'jenkins@jenkins-master'):
                try:
                    AWSProvisioner.sshLeader(clusterName=clusterName,
                                             strict=True)
                except RuntimeError:
                    pass
                else:
                    self.fail(
                        "Host key verification passed where it should have failed"
                    )

            # Add the host key to known_hosts so that the rest of the tests can
            # pass without choking on the verification prompt.
            AWSProvisioner.sshLeader(clusterName=clusterName,
                                     strict=True,
                                     sshOptions=['-oStrictHostKeyChecking=no'])

            system([
                self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName
            ])

            testStrings = [
                "'foo'", '"foo"', '  foo', '$PATH', '"', "'", '\\', '| cat',
                '&& cat', '; cat'
            ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail(
                    'The remote command failed silently where it should have '
                    'raised an error')

            AWSProvisioner.sshLeader(
                clusterName=clusterName,
                args=[
                    'python', '-c',
                    "import os; assert os.environ['TOIL_WORKDIR']=='/var/lib/toil'"
                ])

            # `toil rsync-cluster`
            # Testing special characters - string.punctuation
            fname = '!"#$%&\'()*+,-.;<=>:\ ?@[\\\\]^_`{|}~'
            testData = os.urandom(3 * (10**6))
            with tempfile.NamedTemporaryFile(suffix=fname) as tmpFile:
                relpath = os.path.basename(tmpFile.name)
                tmpFile.write(testData)
                tmpFile.flush()
                # Upload file to leader
                AWSProvisioner.rsyncLeader(clusterName=clusterName,
                                           args=[tmpFile.name, ":"])
                # Ensure file exists
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=["test", "-e", relpath])
            tmpDir = tempfile.mkdtemp()
            # Download the file again and make sure it's the same file
            # `--protect-args` needed because remote bash chokes on special characters
            AWSProvisioner.rsyncLeader(
                clusterName=clusterName,
                args=["--protect-args", ":" + relpath, tmpDir])
            with open(os.path.join(tmpDir, relpath), "r") as f:
                self.assertEqual(
                    f.read(), testData,
                    "Downloaded file does not match original file")
        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
            try:
                shutil.rmtree(tmpDir)
            except NameError:
                pass