예제 #1
0
    def testClusterScalingWithPreemptableJobs(self):
        """
        Test scaling simultaneously for a batch of preemptable and non-preemptable jobs.
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # Preemptable node parameters
        config.nodeType = Shape(20, 10, 10, 10)
        config.minNodes = 0
        config.maxNodes = 10

        # Preemptable node parameters
        config.preemptableNodeType = Shape(20, 10, 10, 10)
        config.minPreemptableNodes = 0
        config.maxPreemptableNodes = 10

        # Algorithm parameters
        config.alphaPacking = 0.8
        config.betaInertia = 1.2
        config.scaleInterval = 3

        self._testClusterScaling(config, numJobs=100, numPreemptableJobs=100)
예제 #2
0
 def testBinPacking(self):
     """
     Tests the bin-packing method used by the cluster scaler.
     """
     for test in range(50):
         nodeShapes = [
             Shape(wallTime=random.choice(list(range(1, 100))),
                   memory=random.choice(list(range(1, 10))),
                   cores=random.choice(list(range(1, 10))),
                   disk=random.choice(list(range(1, 10))),
                   preemptable=False) for i in range(5)
         ]
         randomJobShape = lambda x: Shape(
             wallTime=random.choice(list(range(1, (3 * x.wallTime) + 1))),
             memory=random.choice(list(range(1, x.memory + 1))),
             cores=random.choice(list(range(1, x.cores + 1))),
             disk=random.choice(list(range(1, x.disk + 1))),
             preemptable=False)
         randomJobShapes = []
         for nodeShape in nodeShapes:
             numberOfJobs = random.choice(list(range(1, 1000)))
             randomJobShapes.extend(
                 [randomJobShape(nodeShape) for i in range(numberOfJobs)])
         startTime = time.time()
         numberOfBins = binPacking(jobShapes=randomJobShapes,
                                   nodeShapes=nodeShapes)
         logger.info("Made the following node reservations: %s" %
                     numberOfBins)
예제 #3
0
    def testClusterScalingWithPreemptableJobs(self):
        """
        Test scaling simultaneously for a batch of preemptable and non-preemptable jobs.
        """
        config = Config()

        jobShape = Shape(20, 10, 10, 10, False)
        preemptableJobShape = Shape(20, 10, 10, 10, True)

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # non-preemptable node parameters
        config.nodeTypes = [jobShape, preemptableJobShape]
        config.minNodes = [0, 0]
        config.maxNodes = [10, 10]

        # Algorithm parameters
        config.targetTime = defaultTargetTime
        config.betaInertia = 0.9
        config.scaleInterval = 3

        self._testClusterScaling(config,
                                 numJobs=100,
                                 numPreemptableJobs=100,
                                 jobShape=jobShape)
예제 #4
0
 def testMaxNodes(self):
     """
     Set the scaler to be very aggressive, give it a ton of jobs, and
     make sure it doesn't go over maxNodes.
     """
     self.config.targetTime = 1
     self.config.betaInertia = 0.0
     self.config.maxNodes = [2, 3]
     scaler = ClusterScaler(self.provisioner, self.leader, self.config)
     jobShapes = [
         Shape(wallTime=3600,
               cores=2,
               memory=h2b('1G'),
               disk=h2b('2G'),
               preemptable=True)
     ] * 1000
     jobShapes.extend([
         Shape(wallTime=3600,
               cores=2,
               memory=h2b('1G'),
               disk=h2b('2G'),
               preemptable=False)
     ] * 1000)
     estimatedNodeCounts = scaler.getEstimatedNodeCounts(
         jobShapes, defaultdict(int))
     self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2)
     self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptable], 3)
예제 #5
0
 def testPackingOneShape(self):
     """Pack one shape and check that the resulting reservations look sane."""
     self.bpf.nodeReservations[c4_8xlarge_preemptable] = [
         NodeReservation(c4_8xlarge_preemptable)
     ]
     self.bpf.addJobShape(
         Shape(wallTime=1000,
               cores=2,
               memory=h2b('1G'),
               disk=h2b('2G'),
               preemptable=True))
     self.assertEqual(self.bpf.nodeReservations[r3_8xlarge], [])
     self.assertEqual([
         x.shapes()
         for x in self.bpf.nodeReservations[c4_8xlarge_preemptable]
     ], [[
         Shape(wallTime=1000,
               memory=h2b('59G'),
               cores=34,
               disk=h2b('98G'),
               preemptable=True),
         Shape(wallTime=2600,
               memory=h2b('60G'),
               cores=36,
               disk=h2b('100G'),
               preemptable=True)
     ]])
예제 #6
0
    def testPathologicalCase(self):
        """Test a pathological case where only one node can be requested to fit months' worth of jobs.

        If the reservation is extended to fit a long job, and the
        bin-packer naively searches through all the reservation slices
        to find the first slice that fits, it will happily assign the
        first slot that fits the job, even if that slot occurs days in
        the future.
        """
        # Add one job that partially fills an r3.8xlarge for 1000 hours
        self.bpf.addJobShape(
            Shape(wallTime=3600000,
                  memory=h2b('10G'),
                  cores=0,
                  disk=h2b('10G'),
                  preemptable=False))
        for _ in range(500):
            # Add 500 CPU-hours worth of jobs that fill an r3.8xlarge
            self.bpf.addJobShape(
                Shape(wallTime=3600,
                      memory=h2b('26G'),
                      cores=32,
                      disk=h2b('60G'),
                      preemptable=False))
        # Hopefully we didn't assign just one node to cover all those jobs.
        self.assertNotEqual(self.bpf.getRequiredNodes(), {
            r3_8xlarge: 1,
            c4_8xlarge_preemptable: 0
        })
예제 #7
0
 def split(x, y, t):
     """
     Partition a node allocation into two
     """
     return (Shape(t, x.memory - y.memory, x.cores - y.cores,
                   x.disk - y.disk),
             NodeReservation(
                 Shape(x.wallTime - t, x.memory, x.cores, x.disk)))
예제 #8
0
def split(nodeShape, jobShape, wallTime):
    """
    Partition a node allocation into two to fit the job, returning the
    modified shape of the node and a new node reservation for
    the extra time that the job didn't fill.
    """
    return (Shape(wallTime, nodeShape.memory - jobShape.memory,
                  nodeShape.cores - jobShape.cores,
                  nodeShape.disk - jobShape.disk, nodeShape.preemptable),
            NodeReservation(
                Shape(nodeShape.wallTime - wallTime, nodeShape.memory,
                      nodeShape.cores, nodeShape.disk, nodeShape.preemptable)))
예제 #9
0
 def split(nodeShape, jobShape, t):
     """
     Partition a node allocation into two
     """
     return (Shape(t, nodeShape.memory - jobShape.memory,
                   nodeShape.cores - jobShape.cores,
                   nodeShape.disk - jobShape.disk,
                   nodeShape.preemptable),
             NodeReservation(
                 Shape(nodeShape.wallTime - t, nodeShape.memory,
                       nodeShape.cores, nodeShape.disk,
                       nodeShape.preemptable)))
예제 #10
0
 def tryRun(self):
     while not self.stop:
         with throttle(self.scaler.config.scaleInterval):
             try:
                 queuedJobs = self.scaler.leader.getJobs()
                 queuedJobShapes = [
                     Shape(wallTime=self.scaler.getAverageRuntime(
                         jobName=job.jobName,
                         service=isinstance(job, ServiceJobNode)),
                           memory=job.memory,
                           cores=job.cores,
                           disk=job.disk,
                           preemptable=job.preemptable)
                     for job in queuedJobs
                 ]
                 currentNodeCounts = {}
                 for nodeShape in self.scaler.nodeShapes:
                     nodeType = self.scaler.nodeShapeToType[nodeShape]
                     currentNodeCounts[nodeShape] = len(
                         self.scaler.leader.provisioner.
                         getProvisionedWorkers(
                             nodeType=nodeType,
                             preemptable=nodeShape.preemptable))
                 estimatedNodeCounts = self.scaler.getEstimatedNodeCounts(
                     queuedJobShapes, currentNodeCounts)
                 self.scaler.updateClusterSize(estimatedNodeCounts)
                 if self.stats:
                     self.stats.checkStats()
             except:
                 logger.exception(
                     "Exception encountered in scaler thread. Making a best-effort "
                     "attempt to keep going, but things may go wrong from now on."
                 )
     self.scaler.shutDown()
예제 #11
0
    def testClusterScaling(self):
        """
        Test scaling for a batch of non-preemptable jobs and no preemptable jobs (makes debugging
        easier).
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.maxPreemptableNodes = 0  # No preemptable nodes

        # Non-preemptable parameters
        config.nodeType = Shape(20, 10, 10, 10)
        config.minNodes = 0
        config.maxNodes = 10

        # Algorithm parameters
        config.alphaPacking = 0.8
        config.betaInertia = 1.2
        config.scaleInterval = 3

        self._testClusterScaling(config, numJobs=100, numPreemptableJobs=0)
예제 #12
0
 def getNodeShape(self, preemptable=False):
     instanceType = self._getInstanceType(preemptable)
     return Shape(wallTime=60 * 60,
                  memory=instanceType.memory * 2**30,
                  cores=instanceType.cores,
                  disk=(instanceType.disks * instanceType.disk_capacity *
                        2**30))
예제 #13
0
    def addCompletedJob(self, job, wallTime):
        """
        Adds the shape of a completed job to the queue, allowing the scalar to use the last N
        completed jobs in factoring how many nodes are required in the cluster.
        :param toil.job.JobNode job: The memory, core and disk requirements of the completed job
        :param int wallTime: The wall-time taken to complete the job in seconds.
        """

        #Adjust average runtimes to include this job.
        if job.jobName in self.jobNameToAvgRuntime:
            prevAvg = self.jobNameToAvgRuntime[job.jobName]
            prevNum = self.jobNameToNumCompleted[job.jobName]
            self.jobNameToAvgRuntime[job.jobName] = float(
                prevAvg * prevNum + wallTime) / (prevNum + 1)
            self.jobNameToNumCompleted[job.jobName] += 1
        else:
            self.jobNameToAvgRuntime[job.jobName] = wallTime
            self.jobNameToNumCompleted[job.jobName] = 1

        self.totalJobsCompleted += 1
        self.totalAvgRuntime = float(self.totalAvgRuntime *
                                     (self.totalJobsCompleted - 1) +
                                     wallTime) / self.totalJobsCompleted

        s = Shape(wallTime=wallTime,
                  memory=job.memory,
                  cores=job.cores,
                  disk=job.disk,
                  preemptable=job.preemptable)
        self.scaler.addRecentJobShape(s)
예제 #14
0
    def testClusterScaling(self):
        """
        Test scaling for a batch of non-preemptable jobs and no preemptable jobs (makes debugging
        easier).
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.maxPreemptableNodes = []  # No preemptable nodes

        # Non-preemptable parameters
        config.nodeTypes = [Shape(20, 10, 10, 10, False)]
        config.minNodes = [0]
        config.maxNodes = [10]

        # Algorithm parameters
        config.targetTime = defaultTargetTime
        config.betaInertia = 0.1
        config.scaleInterval = 3

        self._testClusterScaling(config,
                                 numJobs=100,
                                 numPreemptableJobs=0,
                                 jobShape=config.nodeTypes[0])
예제 #15
0
 def subtract(self, jobShape):
     """
     Subtracts the resources necessary to run a jobShape from the reservation.
     """
     self.shape = Shape(self.shape.wallTime,
                        self.shape.memory - jobShape.memory,
                        self.shape.cores - jobShape.cores,
                        self.shape.disk - jobShape.disk,
                        self.shape.preemptable)
예제 #16
0
 def testAddingInitialNode(self):
     """Pack one shape when no nodes are available and confirm that we fit one node properly."""
     self.bpf.addJobShape(Shape(wallTime=1000,
                                cores=2,
                                memory=h2b('1G'),
                                disk=h2b('2G'),
                                preemptable=True))
     self.assertEqual([x.shapes() for x in self.bpf.nodeReservations[c4_8xlarge_preemptable]],
                      [[Shape(wallTime=1000,
                              memory=h2b('59G'),
                              cores=34,
                              disk=h2b('98G'),
                              preemptable=True),
                        Shape(wallTime=2600,
                              memory=h2b('60G'),
                              cores=36,
                              disk=h2b('100G'),
                              preemptable=True)]])
예제 #17
0
 def subtract(nodeShape, jobShape):
     """
     Adjust available resources of a node allocation as a job is scheduled within it.
     """
     return Shape(nodeShape.wallTime,
                  nodeShape.memory - jobShape.memory,
                  nodeShape.cores - jobShape.cores,
                  nodeShape.disk - jobShape.disk,
                  nodeShape.preemptable)
예제 #18
0
 def testBinPacking(self):
     """
     Tests the bin-packing method used by the cluster scaler.
     """
     for test in xrange(50):
         nodeShape = Shape(wallTime=random.choice(range(1, 100)),
                           memory=random.choice(range(1, 10)),
                           cores=random.choice(range(1, 10)),
                           disk=random.choice(range(1, 10)))
         randomJobShape = lambda x: Shape(wallTime=random.choice(range(1, (3 * x.wallTime) + 1)),
                                          memory=random.choice(range(1, x.memory + 1)),
                                          cores=random.choice(range(1, x.cores + 1)),
                                          disk=random.choice(range(1, x.disk + 1)))
         numberOfJobs = random.choice(range(1, 1000))
         randomJobShapes = map(lambda i: randomJobShape(nodeShape), xrange(numberOfJobs))
         startTime = time.time()
         numberOfBins = binPacking(randomJobShapes, nodeShape)
         logger.info("For node shape %s and %s job-shapes got %s bins in %s seconds, %s jobs/bin" % 
                     (nodeShape, numberOfJobs, numberOfBins, time.time() - startTime, float(numberOfJobs)/numberOfBins))
예제 #19
0
 def addCompletedJob(self, job, wallTime):
     """
     Adds the shape of a completed job to the queue, allowing the scalar to use the last N
     completed jobs in factoring how many nodes are required in the cluster.
     :param toil.job.JobNode job: The memory, core and disk requirements of the completed job
     :param int wallTime: The wall-time taken to complete the job in seconds.
     """
     s = Shape(wallTime=wallTime, memory=job.memory, cores=job.cores, disk=job.disk)
     if job.preemptable and self.preemptableScaler is not None:
         self.preemptableScaler.jobShapes.add(s)
     else:
         self.scaler.jobShapes.add(s)
예제 #20
0
 def testJobTooLargeForAllNodes(self):
     """
     If a job is too large for all node types, the scaler should print a
     warning, but definitely not crash.
     """
     # Takes more RAM than an r3.8xlarge
     largerThanR3 = Shape(wallTime=3600,
                          memory=h2b('360G'),
                          cores=32,
                          disk=h2b('600G'),
                          preemptable=False)
     self.bpf.addJobShape(largerThanR3)
예제 #21
0
 def __init__(self, config, nodeShape, N=1000):
     # As a prior we start of with 10 jobs each with the default memory, cores, and disk. To
     # estimate the running time we use the the default wall time of each node allocation,
     # so that one job will fill the time per node.
     self.jobShapes = [Shape(wallTime=nodeShape.wallTime,
                             memory=config.defaultMemory,
                             cores=config.defaultCores,
                             disk=config.defaultDisk)] * 10
     # Calls to add and getLastNJobShapes may be concurrent
     self.lock = Lock()
     # Number of jobs to average over
     self.N = N
예제 #22
0
    def run1000JobsOnMicros(self, jobCores, jobMem, jobDisk, jobTime, globalTargetTime):
        """Test packing 1000 jobs on t2.micros.  Depending on the targetTime and resources,
        these should pack differently.
        """
        nodeShapes = [t2_micro]
        bpf = BinPackedFit(nodeShapes, targetTime=globalTargetTime)

        for _ in range(1000):
            bpf.addJobShape(Shape(wallTime=jobTime,
                                   memory=jobMem,
                                   cores=jobCores,
                                   disk=jobDisk,
                                   preemptable=False))
        return bpf.getRequiredNodes()
예제 #23
0
    def addCompletedJob(self, issuedJob, wallTime):
        """
        Adds the shape of a completed job to the queue, allowing the scalar to use the last N
        completed jobs in factoring how many nodes are required in the cluster.
        
        :param IssuedJob issuedJob: The memory, core and disk requirements of the completed job

        :param int wallTime: The wall-time taken to complete the job in seconds.
        """
        s = Shape(wallTime=wallTime, memory=issuedJob.memory,
                  cores=issuedJob.cores, disk=issuedJob.disk)
        if issuedJob.preemptable:
            self.preemptableRunningJobShape.add(s)
        else:
            self.runningJobShape.add(s)
예제 #24
0
    def getNodeShape(self, nodeType=None, preemptable=False):
        # FIXME: this should only needs to be called once, but failed
        self._instanceTypes = self._azureComputeClient.virtual_machine_sizes.list(self._zone)

        instanceType = (vmType for vmType in self._instanceTypes if vmType.name == nodeType).next()
        disk = instanceType.max_data_disk_count * instanceType.os_disk_size_in_mb * 2 ** 30

        # Underestimate memory by 100M to prevent autoscaler from disagreeing with
        # mesos about whether a job can run on a particular node type
        memory = (instanceType.memory_in_mb - 0.1) * 2 ** 30

        return Shape(wallTime=60 * 60,
                     memory=memory,
                     cores=instanceType.number_of_cores,
                     disk=disk,
                     preemptable=False)
예제 #25
0
    def getNodeShape(self, nodeType, preemptable=False):
        instanceType = ec2_instance_types[nodeType]

        disk = instanceType.disks * instanceType.disk_capacity * 2 ** 30
        if disk == 0:
            # This is an EBS-backed instance. We will use the root
            # volume, so add the amount of EBS storage requested for
            # the root volume
            disk = self.nodeStorage * 2 ** 30

        #Underestimate memory by 100M to prevent autoscaler from disagreeing with 
        #mesos about whether a job can run on a particular node type
        memory = (instanceType.memory - 0.1) * 2** 30
        return Shape(wallTime=60 * 60,
                     memory=memory,
                     cores=instanceType.cores,
                     disk=disk,
                     preemptable=preemptable)
예제 #26
0
    def getNodeShape(self, nodeType=None, preemptable=False):
        instanceTypes = self._azureComputeClient.virtual_machine_sizes.list(
            self.region)

        # Data model: https://docs.microsoft.com/en-us/python/api/azure.mgmt.compute.v2017_12_01.models.virtualmachinesize?view=azure-python
        instanceType = (vmType for vmType in instanceTypes
                        if vmType.name == nodeType).next()

        disk = instanceType.max_data_disk_count * instanceType.os_disk_size_in_mb * 2**30

        # Underestimate memory by 100M to prevent autoscaler from disagreeing with
        # mesos about whether a job can run on a particular node type
        memory = (instanceType.memory_in_mb - 0.1) * 2**30

        return Shape(wallTime=60 * 60,
                     memory=memory,
                     cores=instanceType.number_of_cores,
                     disk=disk,
                     preemptable=False)
예제 #27
0
    def testPreemptableDeficitResponse(self):
        """
        When a preemptable deficit was detected by a previous run of the
        loop, the scaler should add non-preemptable nodes to
        compensate in proportion to preemptableCompensation.
        """
        self.config.targetTime = 1
        self.config.betaInertia = 0.0
        self.config.maxNodes = [10, 10]
        # This should mean that one non-preemptable node is launched
        # for every two preemptable nodes "missing".
        self.config.preemptableCompensation = 0.5
        # In this case, we want to explicitly set up the config so
        # that we can have preemptable and non-preemptable nodes of
        # the same type. That is the only situation where
        # preemptableCompensation applies.
        self.config.nodeTypes = [c4_8xlarge_preemptable, c4_8xlarge]
        self.provisioner.setAutoscaledNodeTypes([
            ({t}, None) for t in self.config.nodeTypes
        ])

        scaler = ClusterScaler(self.provisioner, self.leader, self.config)
        # Simulate a situation where a previous run caused a
        # "deficit" of 5 preemptable nodes (e.g. a spot bid was lost)
        scaler.preemptableNodeDeficit[c4_8xlarge] = 5
        # Add a bunch of preemptable jobs (so the bin-packing
        # estimate for the non-preemptable node should still be 0)
        jobShapes = [
            Shape(wallTime=3600,
                  cores=2,
                  memory=h2b('1G'),
                  disk=h2b('2G'),
                  preemptable=True)
        ] * 1000
        estimatedNodeCounts = scaler.getEstimatedNodeCounts(
            jobShapes, defaultdict(int))
        # We don't care about the estimated size of the preemptable
        # nodes. All we want to know is if we responded to the deficit
        # properly: 0.5 * 5 (preemptableCompensation * the deficit) = 3 (rounded up).
        self.assertEqual(
            estimatedNodeCounts[self.provisioner.node_shapes_for_testing[1]],
            3)
예제 #28
0
    def getNodeShape(self, nodeType, preemptable=False):
        # TODO: read this value only once
        sizes = self._gceDriver.list_sizes(location=self._zone)
        sizes = [x for x in sizes if x.name == nodeType]
        assert len(sizes) == 1
        instanceType = sizes[0]

        disk = 0  #instanceType.disks * instanceType.disk_capacity * 2 ** 30
        if disk == 0:
            # This is an EBS-backed instance. We will use the root
            # volume, so add the amount of EBS storage requested forhe root volume
            disk = self._nodeStorage * 2**30

        # Ram is in M.
        #Underestimate memory by 100M to prevent autoscaler from disagreeing with
        #mesos about whether a job can run on a particular node type
        memory = (instanceType.ram / 1000 - 0.1) * 2**30
        return Shape(wallTime=60 * 60,
                     memory=memory,
                     cores=instanceType.extra['guestCpus'],
                     disk=disk,
                     preemptable=preemptable)
예제 #29
0
 def split(x, y, t):
     return (Shape(t, x.memory - y.memory, x.cores - y.cores,
                   x.disk - y.disk),
             NodeReservation(
                 Shape(x.wallTime - t, x.memory, x.cores,
                       x.disk)))
예제 #30
0
 def subtract(x, y):
     """
     Adjust available resources of a node allocation as a job is scheduled within it.
     """
     return Shape(x.wallTime, x.memory - y.memory,
                  x.cores - y.cores, x.disk - y.disk)