Beispiel #1
0
    def run1000JobsOnMicros(self, jobCores, jobMem, jobDisk, jobTime, globalTargetTime):
        """Test packing 1000 jobs on t2.micros.  Depending on the targetTime and resources,
        these should pack differently.
        """
        nodeShapes = [t2_micro]
        bpf = BinPackedFit(nodeShapes, targetTime=globalTargetTime)

        for _ in range(1000):
            bpf.addJobShape(Shape(wallTime=jobTime,
                                   memory=jobMem,
                                   cores=jobCores,
                                   disk=jobDisk,
                                   preemptable=False))
        return bpf.getRequiredNodes()
Beispiel #2
0
 def setUp(self):
     self.nodeShapes = [c4_8xlarge_preemptable, r3_8xlarge]
     self.bpf = BinPackedFit(self.nodeShapes)
Beispiel #3
0
class BinPackingTest(ToilTest):
    def setUp(self):
        self.nodeShapes = [c4_8xlarge_preemptable, r3_8xlarge]
        self.bpf = BinPackedFit(self.nodeShapes)

    def testPackingOneShape(self):
        """Pack one shape and check that the resulting reservations look sane."""
        self.bpf.nodeReservations[c4_8xlarge_preemptable] = [
            NodeReservation(c4_8xlarge_preemptable)
        ]
        self.bpf.addJobShape(
            Shape(wallTime=1000,
                  cores=2,
                  memory=h2b('1G'),
                  disk=h2b('2G'),
                  preemptable=True))
        self.assertEqual(self.bpf.nodeReservations[r3_8xlarge], [])
        self.assertEqual([
            x.shapes()
            for x in self.bpf.nodeReservations[c4_8xlarge_preemptable]
        ], [[
            Shape(wallTime=1000,
                  memory=h2b('59G'),
                  cores=34,
                  disk=h2b('98G'),
                  preemptable=True),
            Shape(wallTime=2600,
                  memory=h2b('60G'),
                  cores=36,
                  disk=h2b('100G'),
                  preemptable=True)
        ]])

    def testSorting(self):
        """
        Test that sorting is correct: preemptable, then memory, then cores, then disk,
        then wallTime.
        """
        shapeList = [
            c4_8xlarge_preemptable, r3_8xlarge, c4_8xlarge, c4_8xlarge,
            t2_micro, t2_micro, c4_8xlarge, r3_8xlarge, r3_8xlarge, t2_micro
        ]
        shapeList.sort()
        assert shapeList == [
            c4_8xlarge_preemptable, t2_micro, t2_micro, t2_micro, c4_8xlarge,
            c4_8xlarge, c4_8xlarge, r3_8xlarge, r3_8xlarge, r3_8xlarge
        ]

    def testAddingInitialNode(self):
        """Pack one shape when no nodes are available and confirm that we fit one node properly."""
        self.bpf.addJobShape(
            Shape(wallTime=1000,
                  cores=2,
                  memory=h2b('1G'),
                  disk=h2b('2G'),
                  preemptable=True))
        self.assertEqual([
            x.shapes()
            for x in self.bpf.nodeReservations[c4_8xlarge_preemptable]
        ], [[
            Shape(wallTime=1000,
                  memory=h2b('59G'),
                  cores=34,
                  disk=h2b('98G'),
                  preemptable=True),
            Shape(wallTime=2600,
                  memory=h2b('60G'),
                  cores=36,
                  disk=h2b('100G'),
                  preemptable=True)
        ]])

    def testLowTargetTime(self):
        """
        Test that a low targetTime (0) parallelizes jobs aggressively (1000 queued jobs require
        1000 nodes).

        Ideally, low targetTime means: Start quickly and maximize parallelization after the
        cpu/disk/mem have been packed.

        Disk/cpu/mem packing is prioritized first, so we set job resource reqs so that each
        t2.micro (1 cpu/8G disk/1G RAM) can only run one job at a time with its resources.

        Each job is parametrized to take 300 seconds, so (the minimum of) 1 of them should fit into
        each node's 0 second window, so we expect 1000 nodes.
        """
        allocation = self.run1000JobsOnMicros(jobCores=1,
                                              jobMem=h2b('1G'),
                                              jobDisk=h2b('1G'),
                                              jobTime=300,
                                              globalTargetTime=0)
        self.assertEqual(allocation, {t2_micro: 1000})

    def testHighTargetTime(self):
        """
        Test that a high targetTime (3600 seconds) maximizes packing within the targetTime.

        Ideally, high targetTime means: Maximize packing within the targetTime after the
        cpu/disk/mem have been packed.

        Disk/cpu/mem packing is prioritized first, so we set job resource reqs so that each
        t2.micro (1 cpu/8G disk/1G RAM) can only run one job at a time with its resources.

        Each job is parametrized to take 300 seconds, so 12 of them should fit into each node's
        3600 second window.  1000/12 = 83.33, so we expect 84 nodes.
        """
        allocation = self.run1000JobsOnMicros(jobCores=1,
                                              jobMem=h2b('1G'),
                                              jobDisk=h2b('1G'),
                                              jobTime=300,
                                              globalTargetTime=3600)
        self.assertEqual(allocation, {t2_micro: 84})

    def testZeroResourceJobs(self):
        """
        Test that jobs requiring zero cpu/disk/mem pack first, regardless of targetTime.

        Disk/cpu/mem packing is prioritized first, so we set job resource reqs so that each
        t2.micro (1 cpu/8G disk/1G RAM) can run a seemingly infinite number of jobs with its 
        resources.

        Since all jobs should pack cpu/disk/mem-wise on a t2.micro, we expect only one t2.micro to
        be provisioned.  If we raise this, as in testLowTargetTime, it will launch 1000 t2.micros.
        """
        allocation = self.run1000JobsOnMicros(jobCores=0,
                                              jobMem=0,
                                              jobDisk=0,
                                              jobTime=300,
                                              globalTargetTime=0)
        self.assertEqual(allocation, {t2_micro: 1})

    def testLongRunningJobs(self):
        """
        Test that jobs with long run times (especially service jobs) are aggressively parallelized.

        This is important, because services are one case where the degree of parallelization
        really, really matters. If you have multiple services, they may all need to be running
        simultaneously before any real work can be done.

        Despite setting globalTargetTime=3600, this should launch 1000 t2.micros because each job's
        estimated runtime (30000 seconds) extends well beyond 3600 seconds.
        """
        allocation = self.run1000JobsOnMicros(jobCores=1,
                                              jobMem=h2b('1G'),
                                              jobDisk=h2b('1G'),
                                              jobTime=30000,
                                              globalTargetTime=3600)
        self.assertEqual(allocation, {t2_micro: 1000})

    def run1000JobsOnMicros(self, jobCores, jobMem, jobDisk, jobTime,
                            globalTargetTime):
        """Test packing 1000 jobs on t2.micros.  Depending on the targetTime and resources,
        these should pack differently.
        """
        nodeShapes = [t2_micro]
        bpf = BinPackedFit(nodeShapes, targetTime=globalTargetTime)

        for _ in range(1000):
            bpf.addJobShape(
                Shape(wallTime=jobTime,
                      memory=jobMem,
                      cores=jobCores,
                      disk=jobDisk,
                      preemptable=False))
        return bpf.getRequiredNodes()

    def testPathologicalCase(self):
        """Test a pathological case where only one node can be requested to fit months' worth of jobs.

        If the reservation is extended to fit a long job, and the
        bin-packer naively searches through all the reservation slices
        to find the first slice that fits, it will happily assign the
        first slot that fits the job, even if that slot occurs days in
        the future.
        """
        # Add one job that partially fills an r3.8xlarge for 1000 hours
        self.bpf.addJobShape(
            Shape(wallTime=3600000,
                  memory=h2b('10G'),
                  cores=0,
                  disk=h2b('10G'),
                  preemptable=False))
        for _ in range(500):
            # Add 500 CPU-hours worth of jobs that fill an r3.8xlarge
            self.bpf.addJobShape(
                Shape(wallTime=3600,
                      memory=h2b('26G'),
                      cores=32,
                      disk=h2b('60G'),
                      preemptable=False))
        # Hopefully we didn't assign just one node to cover all those jobs.
        self.assertNotEqual(self.bpf.getRequiredNodes(), {
            r3_8xlarge: 1,
            c4_8xlarge_preemptable: 0
        })

    def testJobTooLargeForAllNodes(self):
        """
        If a job is too large for all node types, the scaler should print a
        warning, but definitely not crash.
        """
        # Takes more RAM than an r3.8xlarge
        largerThanR3 = Shape(wallTime=3600,
                             memory=h2b('360G'),
                             cores=32,
                             disk=h2b('600G'),
                             preemptable=False)
        self.bpf.addJobShape(largerThanR3)
Beispiel #4
0
 def setUp(self):
     self.node_shapes_for_testing = [c4_8xlarge_preemptable, r3_8xlarge]
     self.bpf = BinPackedFit(self.node_shapes_for_testing)