Exemple #1
0
    def _performWallClockChecks(self):
        """Watchdog performs the wall clock checks based on MJF. Signals are sent
       to processes if we need to stop, but function always returns S_OK()
    """
        mjf = MJF.MJF()

        try:
            wallClockSecondsLeft = mjf.getWallClockSecondsLeft()
        except Exception as e:
            # Just stop if we can't get the wall clock seconds left
            return S_OK()

        jobstartSeconds = mjf.getIntJobFeature('jobstart_secs')
        if jobstartSeconds is None:
            # Just stop if we don't know when the job started
            return S_OK()

        if (int(time.time()) > jobstartSeconds + self.stopSigStartSeconds) and \
           (wallClockSecondsLeft < self.stopSigFinishSeconds + self.wallClockCheckSeconds):
            # Need to send the signal! Assume it works to avoid sending the signal more than once
            self.log.info('Sending signal %d to JobWrapper children' %
                          self.stopSigNumber)
            self.stopSigSent = True

            try:
                for childPid in getChildrenPIDs(self.wrapperPID):
                    try:
                        cmdline = open('/proc/%d/cmdline' % childPid,
                                       'r').read().replace('\0', ' ').strip()
                    except IOError:
                        # Process gone away? Not running on Linux? Skip anyway
                        continue

                    if re.search(self.stopSigRegex, cmdline) is not None:
                        self.log.info(
                            'Sending signal %d to process ID %d, cmdline = "%s"'
                            % (self.stopSigNumber, childPid, cmdline))
                        os.kill(childPid, self.stopSigNumber)

            except Exception as e:
                self.log.error(
                    'Failed to send signals to JobWrapper children! (%s)' %
                    str(e))

        return S_OK()
Exemple #2
0
configFile = None

for unprocSw in Script.getUnprocessedSwitches():
    if unprocSw[0] in ("U", "Update"):
        update = True
    elif unprocSw[0] in ("R", "Reconfig"):
        configFile = unprocSw[1]

if __name__ == "__main__":

    from DIRAC import gLogger, gConfig
    from DIRAC.WorkloadManagementSystem.Client.DIRACbenchmark import singleDiracBenchmark
    from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
    from DIRAC.Core.Utilities import MJF

    mjf = MJF.MJF()
    mjf.updateConfig()

    db12JobFeature = mjf.getJobFeature('db12')
    hs06JobFeature = mjf.getJobFeature('hs06')

    result = singleDiracBenchmark(1)

    if result is None:
        gLogger.error('Cannot make benchmark measurements')
        DIRAC.exit(1)

    db12Measured = round(result['NORM'], 1)
    corr = Operations().getValue('JobScheduling/CPUNormalizationCorrection',
                                 1.)
    norm = round(result['NORM'] / corr, 1)