Beispiel #1
0
    def test_job_array(self):
        """
        Test that a job array switches from accruing eligible time
        to ineligible time when its last subjob starts running
        """
        logutils = PBSLogUtils()
        a = {'resources_available.ncpus': 2}
        self.server.manager(MGR_CMD_SET, NODE, a, id=self.mom.shortname)

        a = {'log_events': 2047}
        self.server.manager(MGR_CMD_SET, SERVER, a)

        J1 = Job(TEST_USER, attrs={ATTR_J: '1-3'})
        J1.set_sleep_time(20)
        jid = self.server.submit(J1)
        jid_short = jid.split('[')[0]
        sjid1 = jid_short + '[1]'
        sjid2 = jid_short + '[2]'
        sjid3 = jid_short + '[3]'

        # Capture the time stamp when accrue type changed to
        # eligible time
        msg1 = jid + ";Accrue type has changed to eligible_time"
        m1 = self.server.log_match(msg1)
        t1 = logutils.convert_date_time(m1[1].split(';')[0])

        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid1, extend='t')
        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid2, extend='t')
        self.server.expect(JOB, {ATTR_state: 'Q'}, id=sjid3, extend='t')

        # accrue_type = 2 is eligible_time
        self.server.expect(JOB, {'accrue_type': 2}, id=jid)

        self.logger.info("subjobs 1 and 2 finished; subjob 3 must run now")
        self.server.expect(JOB, {ATTR_state: 'R'},
                           id=sjid3,
                           extend='t',
                           offset=20)
        self.server.expect(JOB, {'accrue_type': 1}, id=jid)

        # Capture the time stamp when accrue type changed to
        # ineligible time
        msg2 = jid + ";Accrue type has changed to ineligible_time"
        m2 = self.server.log_match(msg2)
        t2 = logutils.convert_date_time(m2[1].split(';')[0])
        eligible_time = t2 - t1

        m = jid + ";Accrue type has changed to ineligible_time, "
        m += "previous accrue type was eligible_time"
        m += " for %d secs, " % eligible_time
        m += "total eligible_time=00:00:%d" % eligible_time
        self.server.log_match(m)
Beispiel #2
0
    def test_job_array(self):
        """
        Test that a job array switches from accruing eligible time
        to ineligible time when its last subjob starts running
        """
        logutils = PBSLogUtils()
        a = {'resources_available.ncpus': 2}
        self.server.manager(MGR_CMD_SET, NODE, a, id=self.mom.shortname)

        a = {'log_events': 2047}
        self.server.manager(MGR_CMD_SET, SERVER, a)

        J1 = Job(TEST_USER, attrs={ATTR_J: '1-3'})
        J1.set_sleep_time(20)
        jid = self.server.submit(J1)
        jid_short = jid.split('[')[0]
        sjid1 = jid_short + '[1]'
        sjid2 = jid_short + '[2]'
        sjid3 = jid_short + '[3]'

        # Capture the time stamp when subjob 1 starts run. Accrue type changes
        # to eligible time
        msg1 = J1.create_subjob_id(jid, 1) + ";Job Run at request of Scheduler"
        m1 = self.server.log_match(msg1)
        t1 = logutils.convert_date_time(m1[1].split(';')[0])

        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid1, extend='t')
        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid2, extend='t')
        self.server.expect(JOB, {ATTR_state: 'Q'}, id=sjid3, extend='t')

        self.server.expect(JOB, {'accrue_type': self.accrue['eligible']},
                           id=jid)

        self.logger.info("subjobs 1 and 2 finished; subjob 3 must run now")
        self.server.expect(JOB, {ATTR_state: 'R'},
                           id=sjid3,
                           extend='t',
                           offset=20)
        self.server.expect(JOB, {'accrue_type': self.accrue['ineligible']},
                           id=jid)

        # Capture the time stamp when subjob 3 starts run. Accrue type changes
        # to ineligible time. eligible_time calculation is completed.
        msg2 = J1.create_subjob_id(jid, 3) + ";Job Run at request of Scheduler"
        m2 = self.server.log_match(msg2)
        t2 = logutils.convert_date_time(m2[1].split(';')[0])
        eligible_time = int(t2) - int(t1)

        m1 = jid + ";Accrue type has changed to ineligible_time, "
        m1 += "previous accrue type was eligible_time"

        m2 = m1 + " for %d secs, " % eligible_time
        # Format timedelta object as it does not print a preceding 0 for
        # hours in HH:MM:SS
        m2 += "total eligible_time={!s:0>8}".format(
            datetime.timedelta(seconds=eligible_time))
        try:
            self.server.log_match(m2)
        except PtlLogMatchError as e:
            # In some slow machines, there is a delay observed between
            # job run and accrue type change.
            # Checking if log_match failed because eligible_time
            # value was off only by a few seconds(5 seconds).
            # This is done to acommodate differences in the eligible
            # time calculated by the test and the eligible time
            # calculated by PBS.
            # If the eligible_time value was off by > 5 seconds, test fails.
            match = self.server.log_match(m1)
            e_time = re.search(r'(\d+) secs', match[1])
            if e_time:
                self.logger.info("Checking if log_match failed because "
                                 "the eligible_time value was off by "
                                 "a few seconds, but within the allowed "
                                 "range (5 secs). Expected %d secs Got: %s" %
                                 (eligible_time, e_time.group(1)))
                if int(e_time.group(1)) - eligible_time > 5:
                    raise PtlLogMatchError(rc=1, rv=False, msg=e.msg)
            else:
                raise PtlLogMatchError(rc=1, rv=False, msg=e.msg)
Beispiel #3
0
 def test_multi_sched_perf(self):
     """
     Test time taken to schedule and run 5k jobs with
     single scheduler and workload divided among 5 schedulers.
     """
     a = {'resources_available.ncpus': 1000}
     self.server.create_vnodes(self.mom.shortname, a, 5, self.mom)
     a = {'scheduling': 'False'}
     self.server.manager(MGR_CMD_SET, SERVER, a)
     self.submit_njobs(5000)
     start = time.time()
     self.scheduler.run_scheduling_cycle()
     c = self.scheduler.cycles(lastN=1)[0]
     cyc_dur = c.end - c.start
     self.perf_test_result(cyc_dur, "default_cycle_duration", "secs")
     msg = 'Time taken by default scheduler to run 5k jobs is '
     self.logger.info(msg + str(cyc_dur))
     self.server.cleanup_jobs()
     self.setup_scheds()
     self.setup_queues_nodes()
     for sc in self.scheds:
         a = {'scheduling': 'False'}
         self.server.manager(MGR_CMD_SET, SCHED, a, id=sc)
     a = {ATTR_q: 'wq1'}
     self.submit_njobs(1000, a)
     a = {ATTR_q: 'wq2'}
     self.submit_njobs(1000, a)
     a = {ATTR_q: 'wq3'}
     self.submit_njobs(1000, a)
     a = {ATTR_q: 'wq4'}
     self.submit_njobs(1000, a)
     a = {ATTR_q: 'wq5'}
     self.submit_njobs(1000, a)
     start = time.time()
     for sc in self.scheds:
         a = {'scheduling': 'True'}
         self.server.manager(MGR_CMD_SET, SCHED, a, id=sc)
     for sc in self.scheds:
         a = {'scheduling': 'False'}
         self.server.manager(MGR_CMD_SET, SCHED, a, id=sc)
     sc_dur = []
     for sc in self.scheds:
         if sc != 'default':
             self.logger.info("searching log for scheduler " + str(sc))
             log_msg = self.scheds[sc].log_match("Leaving Scheduling Cycle",
                                                 starttime=int(start),
                                                 max_attempts=30)
             endtime = PBSLogUtils.convert_date_time(
                 log_msg[1].split(';')[0])
             dur = endtime - start
             sc_dur.append(dur)
     max_dur = max(sc_dur)
     self.perf_test_result(max_dur, "max_multisched_cycle_duration", "secs")
     msg = 'Max time taken by one of the multi sched to run 1k jobs is '
     self.logger.info(msg + str(max_dur))
     self.perf_test_result(cyc_dur - max_dur,
                           "multisched_defaultsched_cycle_diff", "secs")
     self.assertLess(max_dur, cyc_dur)
     msg1 = 'Multi scheduler is faster than single scheduler by '
     msg2 = 'secs in scheduling 5000 jobs with 5 schedulers'
     self.logger.info(msg1 + str(cyc_dur - max_dur) + msg2)
Beispiel #4
0
 def get_epoch(msg):
     # Since its a log message split on ';' to get timestamp
     a = PBSLogUtils.convert_date_time(msg.split(';')[0])
     return a
    def test_job_array(self):
        """
        Test that a job array switches from accruing eligible time
        to ineligible time when its last subjob starts running
        """
        logutils = PBSLogUtils()
        a = {'resources_available.ncpus': 2}
        self.server.manager(MGR_CMD_SET, NODE, a, id=self.mom.shortname)

        a = {'log_events': 2047}
        self.server.manager(MGR_CMD_SET, SERVER, a)

        J1 = Job(TEST_USER, attrs={ATTR_J: '1-3'})
        J1.set_sleep_time(20)
        jid = self.server.submit(J1)
        jid_short = jid.split('[')[0]
        sjid1 = jid_short + '[1]'
        sjid2 = jid_short + '[2]'
        sjid3 = jid_short + '[3]'

        # Capture the time stamp when subjob 1 starts run. Accrue type changes
        # to eligible time
        msg1 = J1.create_subjob_id(jid, 1) + ";Job Run at request of Scheduler"
        m1 = self.server.log_match(msg1)
        t1 = logutils.convert_date_time(m1[1].split(';')[0])

        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid1, extend='t')
        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid2, extend='t')
        self.server.expect(JOB, {ATTR_state: 'Q'}, id=sjid3, extend='t')

        # accrue_type = 2 is eligible_time
        self.server.expect(JOB, {'accrue_type': 2}, id=jid)

        self.logger.info("subjobs 1 and 2 finished; subjob 3 must run now")
        self.server.expect(JOB, {ATTR_state: 'R'}, id=sjid3,
                           extend='t', offset=20)
        self.server.expect(JOB, {'accrue_type': 1}, id=jid)

        # Capture the time stamp when subjob 3 starts run. Accrue type changes
        # to ineligible time. eligible_time calculation is completed.
        msg2 = J1.create_subjob_id(jid, 3) + ";Job Run at request of Scheduler"
        m2 = self.server.log_match(msg2)
        t2 = logutils.convert_date_time(m2[1].split(';')[0])
        eligible_time = t2 - t1

        m1 = jid + ";Accrue type has changed to ineligible_time, "
        m1 += "previous accrue type was eligible_time"

        m2 = m1 + " for %d secs, " % eligible_time
        # Format timedelta object as it does not print a preceding 0 for
        # hours in HH:MM:SS
        m2 += "total eligible_time={:0>8}".format(
              datetime.timedelta(seconds=eligible_time))
        try:
            self.server.log_match(m2)
        except PtlLogMatchError as e:
            # In some slow machines, there is a delay observed between
            # job run and accrue type change.
            # Checking if log_match failed because eligible_time
            # value was off only by a few seconds(5 seconds).
            # This is done to acommodate differences in the eligible
            # time calculated by the test and the eligible time
            # calculated by PBS.
            # If the eligible_time value was off by > 5 seconds, test fails.
            match = self.server.log_match(m1)
            e_time = re.search('(\d+) secs', match[1])
            if e_time:
                self.logger.info("Checking if log_match failed because "
                                 "the eligible_time value was off by "
                                 "a few seconds, but within the allowed "
                                 "range (5 secs). Expected %d secs Got: %s"
                                 % (eligible_time, e_time.group(1)))
                if int(e_time.group(1)) - eligible_time > 5:
                    raise PtlLogMatchError(rc=1, rv=False, msg=e.msg)
            else:
                raise PtlLogMatchError(rc=1, rv=False, msg=e.msg)