def __init__(self, job):
     super(SlurmCgroupMemTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostcounts = {}
     self._expectedcgroup = "/slurm/uid_{0}/job_{1}".format(
         job.acct['uid'], job.job_id)
Example #2
0
 def __init__(self, job):
     super(CgroupMemTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostcounts = {}
     if job.acct['resource_manager'] == 'pbs':
         self._expectedcgroup = "/torque/{0}".format(job.job_id)
     elif job.acct['resource_manager'] == 'slurm':
         self._expectedcgroup = "/slurm/uid_{0}/job_{1}".format(
             job.acct['uid'], job.job_id)
     else:
         raise NotApplicableError
Example #3
0
 def __init__(self, job):
     super(SimdInsTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostdevnames = {}
     self._error = None
Example #4
0
class SimdInsTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "simdins")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: [SNB_METRICS, NHM_METRICS])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(SimdInsTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}
        self._error = None

    def process(self, nodemeta, timestamp, data, description):

        if len(data[0]) > 0 and data[0][0] == 0:
            # If active == 0 then the PMDA was switched off due to user request
            self._error = ProcessingError.RAW_COUNTER_UNAVAILABLE
            return False

        if len(data[1]) == 0:
            # Ignore timesteps where data was not available
            return True

        hostidx = nodemeta.nodeindex

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty(
                (TimeseriesAccumulator.MAX_DATAPOINTS, len(data[1])))
            self._hostdevnames[hostidx] = dict(
                (str(k), v)
                for k, v in zip(description[1][0], description[1][1]))

        if len(data) == len(NHM_METRICS):
            flops = numpy.array(data[1])
        else:
            flops = 4.0 * data[1] + 2.0 * data[2] + data[3] + data[4]

        insertat = self._data.adddata(hostidx, timestamp, numpy.sum(flops))
        if insertat != None:
            self._hostdata[hostidx][insertat] = flops

            if insertat > 1:
                if numpy.any(
                        flops - self._hostdata[hostidx][insertat - 1] < 0.0):
                    self._error = ProcessingError.PMDA_RESTARTED_DURING_JOB
                    return False

        return True

    def results(self):

        if self._error != None:
            return {"error": self._error}

        values = self._data.get()
        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2],
                                        rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {"times": values[0, 1:, 0].tolist(), "hosts": {}}
            includelist = self._hostdata.keys()

        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = (
                    numpy.diff(self._hostdata[hostidx][:dpnts, devid]) /
                    numpy.diff(values[hostidx, :, 0])).tolist()

            retdata['hosts'][str(
                hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #5
0
 def __init__(self, job):
     super(RateConvertingTimeseriesPlugin, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
Example #6
0
class RateConvertingTimeseriesPlugin(Plugin):
    """ 
    A base abstract class for generating a timeseries summary for values that should
    be converted to rates, one per node.
    The plugin name,  list of required metrics and generator function must be provided by the implementation
    """
    __metaclass__ = ABCMeta

    mode = property(lambda x: "timeseries")

    def __init__(self, job):
        super(RateConvertingTimeseriesPlugin, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}

    @abstractmethod
    def computetimepoint(self, data):
        """ Called with the data for each timepoint on each host """
        pass

    def process(self, nodemeta, timestamp, data, description):

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[nodemeta.nodeindex] = 1

        datum = self.computetimepoint(data)
        if datum != None:
            self._data.adddata(nodemeta.nodeindex, timestamp, datum)

    def results(self):

        if len(self._hostdata) != self._job.nodecount:
            return {"error": ProcessingError.INSUFFICIENT_HOSTDATA}

        values = self._data.get()

        if len(values[0, :, 0]) < 3:
            return {"error": ProcessingError.JOB_TOO_SHORT}

        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2],
                                        rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {"times": values[0, 1:, 0].tolist(), "hosts": {}}
            includelist = self._hostdata.keys()

        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #7
0
 def __init__(self, job):
     super(CpuUserTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostdevnames = {}
Example #8
0
class GpuUsageTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "gpu_usage")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["nvidia.gpuactive"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(GpuUsageTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}

    def process(self, nodemeta, timestamp, data, description):

        hostidx = nodemeta.nodeindex

        if len(data[0]) == 0:
            # Skip data point with no data
            return True

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, len(data[0])))
            self._hostdevnames[hostidx] = dict((str(k), str(v)) for k, v in zip(description[0][0], description[0][1]))

        avg_usage = numpy.mean(data[0])
        insertat = self._data.adddata(hostidx, timestamp, avg_usage)
        if insertat != None:
            self._hostdata[hostidx][insertat] = data[0]

        return True

    def results(self):

        values = self._data.get()

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            memdata = values[:, :, 1]
            sortarr = numpy.argsort(memdata.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], memdata),
                "max": self.collatedata(sortarr[:, -1], memdata),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], memdata),
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = values[hostidx, :, 1].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = self._hostdata[hostidx][:dpnts, int(devid)].tolist()

            retdata['hosts'][str(hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #9
0
class SimdInsTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "simdins")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: [SNB_METRICS, NHM_METRICS])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(SimdInsTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}
        self._error = None

    def process(self, nodemeta, timestamp, data, description):

        if len(data[0]) > 0 and data[0][0] == 0:
            # If active == 0 then the PMDA was switched off due to user request
            self._error = ProcessingError.RAW_COUNTER_UNAVAILABLE
            return False

        if len(data[1]) == 0:
            # Ignore timesteps where data was not available
            return True

        hostidx = nodemeta.nodeindex

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, len(data[1])))
            self._hostdevnames[hostidx] = dict((str(k), v) for k, v in zip(description[1][0], description[1][1]))

        if len(data) == len(NHM_METRICS):
            flops = numpy.array(data[1])
        else:
            flops = 4.0 * data[1] + 2.0 * data[2] + data[3] + data[4]

        insertat = self._data.adddata(hostidx, timestamp, numpy.sum(flops))
        if insertat != None:
            self._hostdata[hostidx][insertat] = flops

            if insertat > 1:
                if numpy.any(flops - self._hostdata[hostidx][insertat-1] < 0.0):
                    self._error = ProcessingError.PMDA_RESTARTED_DURING_JOB
                    return False

        return True

    def results(self):

        if self._error != None:
            return {"error": self._error}

        values = self._data.get()
        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = (numpy.diff(self._hostdata[hostidx][:dpnts, devid]) / numpy.diff(values[hostidx, :, 0])).tolist()

            retdata['hosts'][str(hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #10
0
 def __init__(self, job):
     super(RateConvertingTimeseriesPlugin, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
Example #11
0
class RateConvertingTimeseriesPlugin(Plugin):
    """ 
    A base abstract class for generating a timeseries summary for values that should
    be converted to rates, one per node.
    The plugin name,  list of required metrics and generator function must be provided by the implementation
    """
    __metaclass__ = ABCMeta

    mode = property(lambda x: "timeseries")

    def __init__(self, job):
        super(RateConvertingTimeseriesPlugin, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}

    @abstractmethod
    def computetimepoint(self, data):
        """ Called with the data for each timepoint on each host """
        pass

    def process(self, nodemeta, timestamp, data, description):

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[nodemeta.nodeindex] = 1

        self._data.adddata(nodemeta.nodeindex, timestamp, self.computetimepoint(data))

    def results(self):

        if len(self._hostdata) != self._job.nodecount:
            return {"error": ProcessingError.INSUFFICIENT_HOSTDATA}

        values = self._data.get()

        if len(values[0, :, 0]) < 3:
            return {"error": ProcessingError.JOB_TOO_SHORT}

        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #12
0
class CgroupMemTimeseries(Plugin):
    """ Generate timeseries summary for memory usage viewed from CGroup
        This code is SLURM-specific because of the SLURM cgroup naming convention.
    """

    name = property(lambda x: "process_mem_usage")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["cgroup.memory.usage"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(CgroupMemTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostcounts = {}
        if job.acct['resource_manager'] == 'pbs':
            self._expectedcgroup = "/torque/{0}".format(job.job_id)
        elif job.acct['resource_manager'] == 'slurm':
            self._expectedcgroup = "/slurm/uid_{0}/job_{1}".format(
                job.acct['uid'], job.job_id)
        else:
            raise NotApplicableError

    def process(self, nodemeta, timestamp, data, description):

        hostidx = nodemeta.nodeindex

        if len(data[0]) == 0:
            # Skip data point with no data
            return True

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty(
                (TimeseriesAccumulator.MAX_DATAPOINTS, 1))
            self._hostcounts[hostidx] = {'missing': 0, 'present': 0}

        try:
            dataidx = None
            for idx, desc in enumerate(description[0][1]):
                if re.match(r"^" + re.escape(self._expectedcgroup) + r"($|\.)",
                            desc):
                    dataidx = idx
                    break
            # No cgroup info at this datapoint
            if dataidx is None:
                return True
            nodemem_gb = data[0][dataidx] / 1073741824.0
            self._hostcounts[hostidx]['present'] += 1
        except ValueError:
            self._hostcounts[hostidx]['missing'] += 1
            # No cgroup info at this datapoint
            return True

        insertat = self._data.adddata(hostidx, timestamp, nodemem_gb)
        if insertat != None:
            self._hostdata[hostidx][insertat] = nodemem_gb

        return True

    def results(self):

        if len(self._hostdata) != self._job.nodecount:
            return {'error': ProcessingError.RAW_COUNTER_UNAVAILABLE}

        for hcount in self._hostcounts.itervalues():
            if hcount['missing'] > hcount['present']:
                return {'error': ProcessingError.CPUSET_UNKNOWN}

        values = self._data.get()

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            memdata = values[:, :, 1]
            sortarr = numpy.argsort(memdata.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], memdata),
                "max": self.collatedata(sortarr[:, -1], memdata),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2],
                                        memdata),
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {"times": values[0, :, 0].tolist(), "hosts": {}}
            includelist = self._hostdata.keys()

        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = values[hostidx, :,
                                                           1].tolist()

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
 def __init__(self, job):
     super(SlurmCgroupMemTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostcounts = {}
     self._expectedcgroup = "/slurm/uid_{0}/job_{1}".format(job.acct['uid'], job.job_id)
class SlurmCgroupMemTimeseries(Plugin):
    """ Generate timeseries summary for memory usage viewed from CGroup
        This code is SLURM-specific because of the SLURM cgroup naming convention.
    """

    name = property(lambda x: "process_mem_usage")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["cgroup.memory.usage"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(SlurmCgroupMemTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostcounts = {}
        self._expectedcgroup = "/slurm/uid_{0}/job_{1}".format(job.acct['uid'], job.job_id)

    def process(self, nodemeta, timestamp, data, description):

        hostidx = nodemeta.nodeindex

        if len(data[0]) == 0:
            # Skip data point with no data
            return True

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, 1))
            self._hostcounts[hostidx] = {'missing': 0, 'present': 0}

        try:
            dataidx = description[0][1].index(self._expectedcgroup)
            nodemem_gb = data[0][dataidx] / 1073741824.0
            self._hostcounts[hostidx]['present'] += 1
        except ValueError:
            self._hostcounts[hostidx]['missing'] += 1
            # No cgroup info at this datapoint
            return True

        insertat = self._data.adddata(hostidx, timestamp, nodemem_gb)
        if insertat != None:
            self._hostdata[hostidx][insertat] = nodemem_gb

        return True

    def results(self):

        if len(self._hostdata) != self._job.nodecount:
            return {'error': ProcessingError.RAW_COUNTER_UNAVAILABLE}

        for hcount in self._hostcounts.itervalues():
            if hcount['missing'] > hcount['present']:
                return {'error': ProcessingError.CPUSET_UNKNOWN}

        values = self._data.get()

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            memdata = values[:, :, 1]
            sortarr = numpy.argsort(memdata.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], memdata),
                "max": self.collatedata(sortarr[:, -1], memdata),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], memdata),
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = values[hostidx, :, 1].tolist()

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #15
0
 def __init__(self, job):
     super(CpuUserTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostdevnames = {}
     self._cpusallowed = None
Example #16
0
class PowerUsageTimeseries(Plugin):
    """ Generate the Power usage as a timeseries data """

    name = property(lambda x: "power")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["ipmi.dcmi.power"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(PowerUsageTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}

    @staticmethod
    def computetimepoint(data):
        """ Get the power usage from the data """
        if data[0][0] < numpy.finfo(numpy.float64).eps:
            return None

        return data[0][0]

    def process(self, nodemeta, timestamp, data, description):

        if not data[0]:
            # Skip data point with no data
            return True

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[nodemeta.nodeindex] = 1

        datum = self.computetimepoint(data)
        if datum != None:
            self._data.adddata(nodemeta.nodeindex, timestamp, datum)

        return True

    def results(self):

        if len(self._hostdata) != self._job.nodecount:
            return {"error": ProcessingError.INSUFFICIENT_HOSTDATA}

        values = self._data.get()

        if len(values[0, :, 0]) < 3:
            return {"error": ProcessingError.JOB_TOO_SHORT}

        power = values[:, :, 1]

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(power.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], power),
                "max": self.collatedata(sortarr[:, -1], power),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2],
                                        power),
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {"times": values[0, :, 0].tolist(), "hosts": {}}
            includelist = self._hostdata.keys()

        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = power[hostidx, :].tolist()

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #17
0
class CpuUserTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "cpuuser")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["kernel.percpu.cpu.user"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(CpuUserTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}
        self._cpusallowed = None

    def initcpus(self):
        if self._job.getdata('proc'):
            self._cpusallowed = self._job.getdata('proc')['cpusallowed']
        else:
            self._cpusallowed = {}

    def process(self, nodemeta, timestamp, data, description):

        if self._cpusallowed == None:
            self.initcpus()

        if len(data[0]) == 0:
            # Skip datapoints that have no values
            return True

        if nodemeta.nodename in self._cpusallowed and 'error' not in self._cpusallowed[nodemeta.nodename]:
            cpudata = data[0][self._cpusallowed[nodemeta.nodename]]
        else:
            cpudata = data[0]

        hostidx = nodemeta.nodeindex

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, len(cpudata)))
            if nodemeta.nodename in self._cpusallowed and 'error' not in self._cpusallowed[nodemeta.nodename]:
                self._hostdevnames[hostidx] = {}
                for i, cpuidx in enumerate(self._cpusallowed[nodemeta.nodename]):
                    self._hostdevnames[hostidx][str(i)] = description[0][1][cpuidx]
            else:
                self._hostdevnames[hostidx] = dict((str(k), v) for k, v in zip(description[0][0], description[0][1]))

        insertat = self._data.adddata(hostidx, timestamp, numpy.mean(cpudata)/10.0)
        if insertat != None:
            self._hostdata[hostidx][insertat] = cpudata / 10.0

        return True

    def results(self):

        values = self._data.get()

        if len(values[0, :, 0]) < 3:
            return {"error": ProcessingError.JOB_TOO_SHORT}

        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = (numpy.diff(self._hostdata[hostidx][:dpnts, numpy.int(devid)]) / numpy.diff(values[hostidx, :, 0])).tolist()

            retdata['hosts'][str(hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #18
0
class CpuUserTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "cpuuser")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["kernel.percpu.cpu.user"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(CpuUserTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}

    def process(self, nodemeta, timestamp, data, description):

        if len(data[0]) == 0:
            # Skip datapoints that have no values
            return True

        hostidx = nodemeta.nodeindex

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, len(data[0])))
            self._hostdevnames[hostidx] = dict((str(k), v) for k, v in zip(description[0][0], description[0][1]))

        insertat = self._data.adddata(hostidx, timestamp, numpy.mean(data[0])/10.0)
        if insertat != None:
            self._hostdata[hostidx][insertat] = data[0] / 10.0

        return True

    def results(self):

        values = self._data.get()

        if len(values[0, :, 0]) < 3:
            return {"error": ProcessingError.JOB_TOO_SHORT}

        rates = numpy.diff(values[:, :, 1]) / numpy.diff(values[:, :, 0])

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            sortarr = numpy.argsort(rates.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], rates),
                "max": self.collatedata(sortarr[:, -1], rates),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], rates),
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, 1:, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = rates[hostidx, :].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = (numpy.diff(self._hostdata[hostidx][:dpnts, devid]) / numpy.diff(values[hostidx, :, 0])).tolist()

            retdata['hosts'][str(hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result
Example #19
0
 def __init__(self, job):
     super(GpuUsageTimeseries, self).__init__(job)
     self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
     self._hostdata = {}
     self._hostdevnames = {}
Example #20
0
class MemUsageTimeseries(Plugin):
    """ Generate the CPU usage as a timeseries data """

    name = property(lambda x: "memused_minus_diskcache")
    mode = property(lambda x: "timeseries")
    requiredMetrics = property(lambda x: ["mem.numa.util.used", "mem.numa.util.filePages", "mem.numa.util.slab"])
    optionalMetrics = property(lambda x: [])
    derivedMetrics = property(lambda x: [])

    def __init__(self, job):
        super(MemUsageTimeseries, self).__init__(job)
        self._data = TimeseriesAccumulator(job.nodecount, self._job.walltime)
        self._hostdata = {}
        self._hostdevnames = {}

    def process(self, nodemeta, timestamp, data, description):

        hostidx = nodemeta.nodeindex

        if len(data[0]) == 0:
            # Skip data point with no data
            return True

        if nodemeta.nodeindex not in self._hostdata:
            self._hostdata[hostidx] = numpy.empty((TimeseriesAccumulator.MAX_DATAPOINTS, len(data[0])))
            self._hostdevnames[hostidx] = dict((str(k), v) for k, v in zip(description[0][0], description[0][1]))

        nodemem_kb = numpy.sum(data[0]) - numpy.sum(data[1]) - numpy.sum(data[2])
        insertat = self._data.adddata(hostidx, timestamp, nodemem_kb / 1048576.0)
        if insertat != None:
            self._hostdata[hostidx][insertat] = (data[0] - data[1] - data[2]) / 1048576.0

        return True

    def results(self):

        values = self._data.get()

        if len(self._hostdata) > 64:

            # Compute min, max & median data and only save the host data
            # for these hosts

            memdata = values[:, :, 1]
            sortarr = numpy.argsort(memdata.T, axis=1)

            retdata = {
                "min": self.collatedata(sortarr[:, 0], memdata),
                "max": self.collatedata(sortarr[:, -1], memdata),
                "med": self.collatedata(sortarr[:, sortarr.shape[1] / 2], memdata),
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }

            uniqhosts = Counter(sortarr[:, 0])
            uniqhosts.update(sortarr[:, -1])
            uniqhosts.update(sortarr[:, sortarr.shape[1] / 2])
            includelist = uniqhosts.keys()
        else:
            # Save data for all hosts
            retdata = {
                "times": values[0, :, 0].tolist(),
                "hosts": {}
            }
            includelist = self._hostdata.keys()


        for hostidx in includelist:
            retdata['hosts'][str(hostidx)] = {}
            retdata['hosts'][str(hostidx)]['all'] = values[hostidx, :, 1].tolist()
            retdata['hosts'][str(hostidx)]['dev'] = {}

            for devid in self._hostdevnames[hostidx].iterkeys():
                dpnts = len(values[hostidx, :, 0])
                retdata['hosts'][str(hostidx)]['dev'][devid] = self._hostdata[hostidx][:dpnts, devid].tolist()

            retdata['hosts'][str(hostidx)]['names'] = self._hostdevnames[hostidx]

        return retdata

    @staticmethod
    def collatedata(args, rates):
        """ build output data """
        result = []
        for timepoint, hostidx in enumerate(args):
            try:
                result.append([rates[hostidx, timepoint], int(hostidx)])
            except IndexError:
                pass

        return result