Beispiel #1
0
    def __init__(self,
                 data_source=None,
                 data_reader=None,
                 operator=None,
                 processCache=None,
                 jobCache=None):
        DataParser.__init__(self,
                            data_source=data_source,
                            data_reader=data_reader)
        self._operator = operator

        try:
            self._process_cache = pickle.load(open(processCache, "rb"))
        except Exception as exception:
            self._process_cache = ObjectCache()
            logging.getLogger(self.__class__.__name__).warn(
                "%s: did not load pickled ProcessCache", exception)
        else:
            logging.getLogger(self.__class__.__name__).info(
                "Initialized with pickled ProcessCache")
        try:
            self._job_cache = pickle.load(open(jobCache, "rb"))
        except Exception as exception:
            self._job_cache = ObjectCache()
            logging.getLogger(self.__class__.__name__).warn(
                "%s: did not load pickled JobCache", exception)
        else:
            logging.getLogger(self.__class__.__name__).info(
                "Initialized with pickled JobCache")
 def __init__(self,
              data_source=None,
              data_reader=None,
              workernode=None,
              run=None,
              **kwargs):
     self._process_cache = ObjectCache()
     DataParser.__init__(self, data_source, data_reader, **kwargs)
     self._data = self._data or ObjectCache()
     self.workernode = workernode
     self.run = run
Beispiel #3
0
 def __init__(self, db_id=None, job_id=None, workernode=None, run=None, tme=None, gpid=None,
              configuration=None, last_tme=0, **kwargs):
     self._db_id = db_id
     self._job_id = job_id
     self.workernode = workernode
     self.run = run
     self._tme = int(tme) if tme is not None else 0
     self._gpid = int(gpid) if gpid is not None else 0
     self._root = None
     self._process_cache = ObjectCache()
     self._tree_initialized = False
     self._configuration = configuration
     self._last_tme = last_tme
     self._tree = None
     # for lazy loading of traffic
     self.data_source = kwargs.get("data_source", None)
     self.path = kwargs.get("path", None)
     self.variant = kwargs.get("variant", None)
 def __init__(self,
              data_source=None,
              data_reader=None,
              workernode=None,
              run=None,
              **kwargs):
     DataParser.__init__(self, data_source, data_reader, **kwargs)
     if self.data_source is not None:
         self._data = next(
             self.data_source.object_data(pattern="traffic_data.pkl",
                                          path=kwargs.get("path", None)),
             ObjectCache())
         self._parsed_data = next(
             self.data_source.object_data(pattern="traffic_parsed_data.pkl",
                                          path=kwargs.get("path", None)),
             set())
     else:
         self._data = ObjectCache()
     self.workernode = workernode
     self.run = run
     self._last_tme = None
class TrafficStreamParser(DataParser):
    """
    First the :py:class:`TrafficStreamParser` follows a very simple approach. When a traffic entry
    is going to be parsed, it checks if it already knows the :py:class:`Job` it belongs to.
    If no :py:class:`Job` can be found, a streamlined version (just identified by unique constraint
    values) is loaded and put into the cache. This cache is used to attach the single
    :py:class:`Traffic` objects in form of an array.

    At the time when splitting the stream, it does not matter, if the single traffic entries are
    matched to existing :py:class:`Process`es. They just need to appear in csv files.
    """
    def __init__(self,
                 data_source=None,
                 data_reader=None,
                 workernode=None,
                 run=None,
                 **kwargs):
        DataParser.__init__(self, data_source, data_reader, **kwargs)
        if self.data_source is not None:
            self._data = next(
                self.data_source.object_data(pattern="traffic_data.pkl",
                                             path=kwargs.get("path", None)),
                ObjectCache())
            self._parsed_data = next(
                self.data_source.object_data(pattern="traffic_parsed_data.pkl",
                                             path=kwargs.get("path", None)),
                set())
        else:
            self._data = ObjectCache()
        self.workernode = workernode
        self.run = run
        self._last_tme = None

    def pop_data(self):
        for key in self._data.object_cache.keys():
            while self._data.object_cache[key]:
                wrapper = self._data.object_cache[key].pop()
                yield wrapper.traffic

    def check_caches(self, **kwargs):
        if not self._changed:
            return
        for traffic in self._data.unfound.copy():
            _, appended, _ = self._match_traffic(traffic=traffic)
            if appended:
                self._data.unfound.discard(traffic)

    def clear_caches(self):
        self._data.clear()

    # TODO: fix naming of method
    def defaultHeader(self, **kwargs):
        return Traffic.default_header(**kwargs)

    def archive_state(self, **kwargs):
        if self.data_source is not None:
            self.data_source.write_object_data(data=self._data,
                                               name="traffic_data",
                                               **kwargs)
            self.data_source.write_object_data(data=self.configuration,
                                               name="configuration",
                                               **kwargs)
            self.data_source.write_object_data(data=self._parsed_data,
                                               name="traffic_parsed_data",
                                               **kwargs)
        else:
            logging.getLogger(self.__class__.__name__).warning(
                "Archiving not done because of missing data_source")

    def _add_piece(self, piece=None):
        self._changed = True
        # look for matching job
        finished, _, matching_wrapper = self._match_traffic(traffic=piece)
        if finished and object is not None:
            self._data.remove_data(data=matching_wrapper)
            return matching_wrapper.traffic

        # check for other finished jobs
        self._last_tme = piece.tme - self._interval()
        return next(self._check_data(), None)

    def _match_traffic(self, traffic=None):
        # load job object from cache
        matching_traffic_wrapper = None
        finished = False
        try:
            object_index = self._data.data_index(value=traffic.tme,
                                                 key=traffic.gpid)
        except DataNotInCacheException:
            appended = self._match_with_new_wrapper(traffic=traffic)
        else:
            try:
                matching_traffic_wrapper = self._data.object_cache[
                    traffic.gpid][object_index]
                if traffic.tme - self._interval(
                ) <= matching_traffic_wrapper.exit_tme:
                    matching_traffic_wrapper.data.append(traffic)
                    appended = True
                else:
                    # remember and remove old wrapper
                    finished = True
                    appended = self._match_with_new_wrapper(traffic=traffic)
            except IndexError:
                # no wrapper is known
                appended = self._match_with_new_wrapper(traffic=traffic)
            except KeyError:
                # no wrapper is known
                appended = self._match_with_new_wrapper(traffic=traffic)
        return finished, appended, matching_traffic_wrapper

    def _piece_from_dict(self, data_dict=None):
        return Traffic(**data_dict)

    def _match_with_new_wrapper(self, traffic=None):
        if traffic.gpid == 0:
            wrapper = TrafficWrapper(
                Job(gpid=0,
                    tme=0,
                    last_tme=5000000000,
                    workernode=self.workernode,
                    run=self.run))
        else:
            wrapper = self._load_traffic_wrapper(traffic=traffic)
        if wrapper is not None:
            wrapper.data.append(traffic)
            self._data.add_data(data=wrapper)
            return True
        else:
            logging.getLogger(self.__class__.__name__).warning(
                "was not able to get job for traffic (gpid: %s, tme: %s, workernode: %s, run: %s)",
                traffic.gpid, traffic.tme, self.workernode, self.run)
            self._data.unfound.add(traffic)
        return False

    def _load_traffic_wrapper(self, traffic=None):
        job = Job(workernode=self.workernode,
                  run=self.run,
                  tme=traffic.tme + self._interval(),
                  gpid=traffic.gpid)
        job = self.data_source.job_description(data=job)
        if job is not None and job.last_tme > 0:
            wrapper = TrafficWrapper(job=job, configuration=self.configuration)
            return wrapper
        return None

    def _interval(self):
        if self.configuration is not None:
            return self.configuration.interval
        return 20

    def _parsing_finished(self):
        for data in self._check_data():
            yield data

    def _check_data(self):
        for key in self._data.object_cache.keys():
            for wrapper in self._data.object_cache[key][:]:
                if wrapper.exit_tme < self._last_tme:
                    self._data.object_cache[key].remove(wrapper)
                    yield wrapper.traffic
Beispiel #6
0
class ProcessParser(DataParser):
    """
    The :py:class:`ProcessParser` takes process events and accumulates these to a complete process.
    Complete processes belonging to the same job can then be joined by using a
    :py:class:`JobParser`.
    """
    def __init__(self,
                 data_source=None,
                 data_reader=None,
                 operator=None,
                 processCache=None,
                 jobCache=None):
        DataParser.__init__(self,
                            data_source=data_source,
                            data_reader=data_reader)
        self._operator = operator

        try:
            self._process_cache = pickle.load(open(processCache, "rb"))
        except Exception as exception:
            self._process_cache = ObjectCache()
            logging.getLogger(self.__class__.__name__).warn(
                "%s: did not load pickled ProcessCache", exception)
        else:
            logging.getLogger(self.__class__.__name__).info(
                "Initialized with pickled ProcessCache")
        try:
            self._job_cache = pickle.load(open(jobCache, "rb"))
        except Exception as exception:
            self._job_cache = ObjectCache()
            logging.getLogger(self.__class__.__name__).warn(
                "%s: did not load pickled JobCache", exception)
        else:
            logging.getLogger(self.__class__.__name__).info(
                "Initialized with pickled JobCache")

    @staticmethod
    def defaultHeader(length=9):
        """
        Returns a dictionary of fields and positions in default configuration of header.

        :param int length: Length of expected header
        :return: Dictionary with keys describing the attributes and values giving the position
        :rtype: dict
        """
        return {
            "tme": 0,
            "pid": 1,
            "ppid": 2,
            "uid": 3,
            "name": 4,
            "cmd": 5,
            "exit_code": 6,
            "state": 7,
            "gpid": 8
        }

    def parseRow(self, row=None, headerCache=None, tme=None):
        if "state" in headerCache:
            if "exit" in row[headerCache['state']]:
                # load process and set exit arguments,
                # afterwards remove it from cache
                pid = int(row[headerCache['pid']])
                process_index = self._process_cache.data_index(value=tme,
                                                               key=pid)
                try:
                    process = self._process_cache.object_cache[pid][
                        process_index]
                    if (row[headerCache['name']] not in process.name
                            and row[headerCache['cmd']] not in process.cmd):
                        # wrong process selected!
                        logging.getLogger(self.__class__.__name__).warning(
                            "process %s has not been logged", row)
                        process = Process(name=row[headerCache['name']],
                                          cmd=row[headerCache['cmd']],
                                          pid=row[headerCache['pid']],
                                          ppid=row[headerCache['ppid']],
                                          uid=row[headerCache['uid']])
                        self._process_cache.add_data(data=process)
                        process_index = self._process_cache.data_index(
                            value=tme, key=pid)
                except KeyError:
                    # exit event received firsts
                    process = Process()
                    process.addProcessEvent(
                        name=row[headerCache['name']],
                        cmd=row[headerCache['cmd']],
                        pid=row[headerCache['pid']],
                        ppid=row[headerCache['ppid']],
                        uid=row[headerCache['uid']],
                        tme=row[headerCache['tme']],
                        exit_code=row[headerCache['exit_code']],
                        gpid=row[headerCache['gpid']],
                        state=row[headerCache['state']])
                    self._process_cache.add_data(data=process)
                    process_index = self._process_cache.data_index(value=tme,
                                                                   key=pid)
                else:
                    process.addProcessEvent(
                        tme=row[headerCache['tme']],
                        exit_code=row[headerCache['exit_code']],
                        state=row[headerCache['state']])
                try:
                    job = self._operator.getJob(tme=tme,
                                                gpid=int(
                                                    row[headerCache['gpid']]))
                except BasicException:
                    # the job is currently not known so remember as unknown
                    self._process_cache.unfound.add(process)
                    self._process_cache.remove_data(data=process, key=pid)
                except Exception:
                    # the job is currently not known so remember as unknown
                    self._process_cache.unfound.add(process)
                    self._process_cache.remove_data(data=process, key=pid)
                else:
                    # job has been found, so save current data
                    self._finish_process(job=job, process=process)
            else:
                # a new process is getting to know
                # process has been started, so create and remember
                process = self._create_process(row=row,
                                               header_cache=headerCache)
                if "sge_shepherd" in process.cmd:
                    # new pilot is starting
                    try:
                        job = self._operator.getJob(
                            tme=tme,
                            gpid=int(row[headerCache['gpid']]),
                            batchsystemId=process.batchsystemId)
                        if job.exit_tme and (int(job.exit_tme) < int(tme)):
                            self._operator.createJob(
                                tme=tme,
                                gpid=int(row[headerCache['gpid']]),
                                batchsystemId=process.batchsystemId)
                        else:
                            logging.getLogger(self.__class__.__name__).error(
                                "ATTENTION: job was not created as it already seems to be "
                                "existent - job_id from DB %d vs CSV %d",
                                job.job_id, process.batchsystemId)
                    except Exception:
                        self._operator.createJob(
                            tme=tme,
                            gpid=int(row[headerCache['gpid']]),
                            batchsystemId=process.batchsystemId)
        else:
            # load object
            self._create_process(row=row, header_cache=headerCache)

    def check_caches(self, tme=None):
        logging.debug("checking caches")

        # check unfound nodes if a job already exists
        for process in self._process_cache.unfound.copy():
            try:
                job = self._operator.getJob(tme=tme, gpid=process.gpid)
            except Exception:
                # check if process is already too old and remove it
                if tme - process.exit_tme >= 86400:
                    self._operator.dumpErrors(typename="process", data=process)
                    self._process_cache.unfound.discard(process)
            else:
                logging.info("removed unfound node")
                # job has been found, so save current data
                self._finish_process(job=job, process=process)
                self._process_cache.unfound.discard(process)

        for jid in self._job_cache.object_cache.keys():
            for job_parser in self._job_cache.object_cache[jid][:]:
                if jid == 0:
                    self._save_raw_processes(job_parser=job_parser, job_id=jid)
                else:
                    result = self._save_and_delete_job(job_parser=job_parser,
                                                       job_id=jid)
                    if not result:
                        # check if job is already too old and remove it
                        job = self._operator.getJobById(jobId=jid)
                        if job.last_tme and (tme - job.last_tme >= 86400):
                            self._save_raw_processes(job_parser=job_parser,
                                                     job_id=jid)

    def clear_caches(self, **kwargs):
        """
        Method clears the current caches and takes care to pickle the uncompleted processes
        and jobs.
        """
        logging.debug("clearing caches")

        pickle.dump(
            self._process_cache,
            open(self._operator.getPicklePath(typename="process"), "wb"), -1)
        self._process_cache.clear()

        pickle.dump(self._job_cache,
                    open(self._operator.getPicklePath(typename="job"), "wb"),
                    -1)
        self._job_cache.clear()

    def _create_process(self, row=None, header_cache=None):
        process = Process.from_dict(row=dict(zip(header_cache, row)))
        self._process_cache.add_data(data=process)
        return process

    def _finish_process(self, job=None, process=None):
        if not job.last_tme:
            job.last_tme = process.exit_tme
        if process.exit_tme > job.last_tme:
            job.last_tme = process.exit_tme
            self._operator.updateJob(job)
        self._move_process(process=process, job_id=(job.id_value or 0))

        # job is complete so remember and save it
        if "sge_shepherd" in process.cmd:
            job.exit_tme = process.exit_tme

            job_parser = self._job_cache.get_data(key=job.id_value, value=0)
            if not self._save_and_delete_job(
                    job_parser=job_parser, job_id=job.id_value, job=job):
                logging.info("waiting for more processes to complete job...")

                # remove dbJob from Cache
                job.valid = job_parser.isValid()
                job.completed = False
                self._operator.saveAndDeleteJob(job)

    def _move_process(self, process=None, job_id=None):
        if job_id == 0:
            logging.info("received jobId 0")
        job_parser = self._job_cache.get_data(value=0, key=job_id)
        try:
            job_parser.addProcess(process=process)
        except NonUniqueRootException as exception:
            logging.getLogger(self.__class__.__name__).error(
                "%s: added second root node to tree with id %d - batchsystemId: %d",
                exception, job_id, process.batchsystemId)
        except Exception:
            job_parser = JobParser()
            job_parser.add_piece(piece=process)
            # add tme field for ObjectCache
            job_parser.tme = 0
            self._job_cache.add_data(data=job_parser, key=job_id)
        self._process_cache.remove_data(data=process)

    def _save_and_delete_job(self, job_parser=None, job_id=None, job=None):
        tree = job_parser.regenerateTree()
        if tree is not None:
            path = self._operator.getPath(typename="process", jobId=job_id)
            with open(path, "w+") as csvfile:
                csvfile.write("# Created by %s on %s\n" %
                              ("processparser.py", time.strftime("%Y%m%d")))
                csvfile.write("# Input Data: Raw Stream Data\n")
                csvfile.write("# Output Data: Combined Process Events\n")
                csvfile.write("%s\n" % (tree.root.value.getHeader()))
                for node in tree.walkBFS():
                    csvfile.write("%s\n" % (node.value.getRow()))

            if job is None:
                job = self._operator.getJobById(job_id)
            if job is not None:
                job.valid = job_parser.isValid()
                job.completed = True
                job.uid = job_parser.uid
                self._operator.saveAndDeleteJob(job)
            self._job_cache.remove_data(data=job_parser, key=job_id)
            return True
        # otherwise keep job in cache and wait for more...
        return False

    def _save_raw_processes(self, job_parser=None, job_id=None):
        process_cache = job_parser.processCache
        if job_id > 0:
            logging.getLogger(self.__class__.__name__).error(
                "have not been able to write tree with %d processes for job %d, writing without "
                "tree", job_parser.processCount(), job_id)
            for pid in process_cache:
                for node in process_cache[pid]:
                    self._operator.dumpData(typename="process",
                                            data=node.value,
                                            jobId=job_id)
        else:
            logging.getLogger(self.__class__.__name__).error(
                "have not been able to write tree with %d processes, writing without tree",
                job_parser.processCount())
            for pid in process_cache:
                for node in process_cache[pid]:
                    self._operator.dumpIncompletes(typename="process",
                                                   data=node.value)
        self._job_cache.remove_data(data=job_parser, key=job_id)
 def load_archive_state(self, path=None):
     if self.data_source is not None:
         self._process_cache = next(
             self.data_source.object_data(pattern="process_cache.pkl",
                                          path=path), ObjectCache())
class ProcessStreamParser(DataParser):
    """
    The :py:class:`ProcessStreamParser` works on the log files produced by the GNM monitoring tool.
    One after the other it parses the different lines belonging to one specific run on one specific
    workernode. It takes care in splitting the data belonging to different :py:class:`job`s.
    As soon as one object has been finished it is given to the :py:attr:`DataSource` for further
    handling/storage.

    As the completeness regarding the traffic cannot be determined automatically, it is ... what?!
    """
    job_root_name = "sge_shepherd"

    def __init__(self,
                 data_source=None,
                 data_reader=None,
                 workernode=None,
                 run=None,
                 **kwargs):
        self._process_cache = ObjectCache()
        DataParser.__init__(self, data_source, data_reader, **kwargs)
        self._data = self._data or ObjectCache()
        self.workernode = workernode
        self.run = run

    def load_archive_state(self, path=None):
        if self.data_source is not None:
            self._process_cache = next(
                self.data_source.object_data(pattern="process_cache.pkl",
                                             path=path), ObjectCache())

    @staticmethod
    def defaultHeader(**kwargs):
        return Job.default_header(**kwargs)

    def archive_state(self, **kwargs):
        if self.data_source is not None:
            self.data_source.write_object_data(data=self._data,
                                               name="data",
                                               **kwargs)
            self.data_source.write_object_data(data=self._process_cache,
                                               name="process_cache",
                                               **kwargs)
            self.data_source.write_object_data(data=self.configuration,
                                               name="configuration",
                                               **kwargs)
            self.data_source.write_object_data(data=self._parsed_data,
                                               name="parsed_data",
                                               **kwargs)
        else:
            logging.getLogger(self.__class__.__name__).warning(
                "Archiving not done because of missing data_source")

    def pop_data(self):
        _data = self._data
        for key in _data.object_cache.keys():
            while _data.object_cache[key]:
                yield _data.object_cache[key].pop()

    def check_caches(self, **kwargs):
        if not self._changed:
            return
        _finish_process = self._finish_process
        _process_cache = self._process_cache
        for process in self._process_cache.unfound.copy():
            is_finished, job = _finish_process(process)
            if is_finished:
                _process_cache.unfound.discard(process)
            else:
                # try to load job from data_source
                job_object = Job(workernode=self.workernode,
                                 run=self.run,
                                 tme=process.tme,
                                 gpid=process.gpid,
                                 configuration=self.configuration,
                                 data_source=self.data_source)
                job_reader = self.data_source.read_job(data=job_object,
                                                       path=kwargs.get(
                                                           "path", None))
                if job_reader is not None:
                    for job in job_reader:
                        if job is not None and job.job_id:
                            job_object = job
                            job_object.add_process(process=process)
                            _process_cache.unfound.discard(process)

    def clear_caches(self):
        self._data.clear()
        self._process_cache.clear()

    def _piece_from_dict(self, piece=None):
        return piece

    def _add_piece(self, process_dict=None):
        self._changed = True
        _process_cache = self._process_cache
        if int(process_dict.get("gpid", 0)) > 0:
            if "exit" in process_dict.get("state", None):
                try:
                    matching_process = _process_cache.get_data(
                        value=int(process_dict.get(
                            "tme",
                            0)),  # we are in event, so tme means exit_tme
                        key=int(process_dict.get("pid", 0)))
                except DataNotInCacheException:
                    _process_cache.add_data(data=Process.from_process_event(
                        **process_dict))
                else:
                    if matching_process is not None:
                        try:
                            matching_process.addProcessEvent(**process_dict)
                        except ProcessMismatchException as exception:
                            logging.getLogger(
                                self.__class__.__name__).warning(exception)
                            _process_cache.add_data(
                                data=Process.from_process_event(
                                    **process_dict))
                        else:
                            _process_cache.remove_data(
                                data=matching_process,
                                key=matching_process.pid)
                            is_finished, job = self._finish_process(
                                process=matching_process)
                            if not is_finished and job is None:
                                _process_cache.unfound.add(matching_process)
                            elif is_finished:
                                self._data.remove_data(data=job, key=job.gpid)
                                return job
                    else:
                        process = Process.from_process_event(**process_dict)
                        logging.getLogger(self.__class__.__name__).warning(
                            "received exit event of process before actual start event: %s"
                            % process)
                        _process_cache.add_data(data=process)
            else:
                process = Process.from_process_event(**process_dict)
                if self.job_root_name in process.name:
                    # create new dummy job
                    self._data.add_data(data=Job(
                        workernode=self.workernode,
                        run=self.run,
                        tme=process.tme,
                        gpid=process.gpid,
                        job_id=process.batchsystemId,
                        configuration=self.configuration,
                        data_source=self.data_source),
                                        key=process.gpid,
                                        value=process.tme)
                _process_cache.add_data(data=process)

    def _finish_process(self, process=None):
        try:
            # FIXME: I have no idea why I currently have to add this...
            object_index = self._data.data_index(value=process.tme,
                                                 key=process.gpid)
        except DataNotInCacheException:
            return False, None
        try:
            matching_job = self._data.object_cache[process.gpid][object_index]
        except KeyError:
            logging.getLogger(self.__class__.__name__).debug(
                "no matching job has been found %s", process)
        else:
            matching_job.add_process(process=process,
                                     is_root=(self.job_root_name
                                              in process.name))
            if self.job_root_name in process.name:
                return True, matching_job
            return False, matching_job
        return False, None
Beispiel #9
0
class Job(object):
    """
    This class acts as a wrapper for the different processes forming a batch system job.
    It allows access to the job tree.
    """
    def __init__(self, db_id=None, job_id=None, workernode=None, run=None, tme=None, gpid=None,
                 configuration=None, last_tme=0, **kwargs):
        self._db_id = db_id
        self._job_id = job_id
        self.workernode = workernode
        self.run = run
        self._tme = int(tme) if tme is not None else 0
        self._gpid = int(gpid) if gpid is not None else 0
        self._root = None
        self._process_cache = ObjectCache()
        self._tree_initialized = False
        self._configuration = configuration
        self._last_tme = last_tme
        self._tree = None
        # for lazy loading of traffic
        self.data_source = kwargs.get("data_source", None)
        self.path = kwargs.get("path", None)
        self.variant = kwargs.get("variant", None)

    def clear_caches(self):
        self._root = None
        self._process_cache.clear()
        self._tree_initialized = False

    def prepare_traffic(self):
        # FIXME: the correct path is sometimes not built
        # inside invalidated_exception/c01-007-102/1/112468-6-process.csv
        # inside invalidated_exception/c01-007-102/1/112468-traffic.csv
        try:
            if self.variant is not None:
                traffic = self.data_source.read_traffic(path=self.path, name="%s-%s" % (
                    self.db_id, self.variant))
            else:
                traffic = self.data_source.read_traffic(path=self.path, name=self.db_id)
            for traffics in traffic:
                for element in traffics:
                    self.add_traffic(element)
        except DataNotInCacheException as e:
            logging.getLogger(self.__class__.__name__).warning(
                "Traffic object (%s) could not be appended to job: %s", traffic, e
            )
        except AttributeError:
            raise NoDataSourceException
        except FilePathException:
            raise
        except IOError:
            # FIXME: here I should maybe do something about it...
            pass

    @property
    def last_tme(self):
        """
        Method to return the last known tme within the job.

        :return: last known tme
        """
        return self._last_tme or self.exit_tme

    @last_tme.setter
    def last_tme(self, value):
        """
        Method to set the last known tme.

        :param value: the last tme to set
        """
        self._last_tme = value

    @property
    def configuration(self):
        """
        Method to return the configuration the job was created/recorded with.

        :return: extracted configuration
        """
        if self._configuration is None:
            return MonitoringConfiguration(version="alpha", level="treeconnection")
        return self._configuration

    @configuration.setter
    def configuration(self, value=None):
        """
        Method to set the configuration the job was created/recorded with.

        :param value: the configuration to set
        """
        self._configuration = value

    @property
    def db_id(self):
        """
        Method to return the associated id of the job within the database.

        :return: job identifier in database
        """
        return self._db_id or self.job_id

    @db_id.setter
    def db_id(self, value=None):
        """
        Method to set the associated id of the job within the database.

        :param value: job identifier in database to set
        """
        self._db_id = value

    @property
    def job_id(self):
        """
        Method returns the job id. If a valid id from the batchsystem can be extracted, it is
        returned instead of a self-assigned one.

        :return: batchsystem job id or self-assigned
        """
        try:
            batchsystem_id = self._root.value.batchsystemId
            if batchsystem_id is not None:
                return batchsystem_id
            else:
                return self._job_id
        except:
            return self._job_id

    @job_id.setter
    def job_id(self, job_id=None):
        """
        Method to set a self-assigned job identifier.

        :param job_id: job identifier to be set
        """
        self._job_id = job_id

    @property
    def gpid(self):
        """
        Method that returns the associated group identifier of the job.

        :return: group identifier
        """
        try:
            return self._root.value.gpid
        except:
            return self._gpid

    @property
    def uid(self):
        """
        Method that returns the associated user identifier of the job. As the root process
        has uid 0, the first valid uid that is found in the process hierarchy is used.

        :return: first valid uid for job
        """
        process_cache = self._process_cache.object_cache
        for pid in process_cache:
            for node in process_cache[pid]:
                if node.value.uid > 0:
                    return node.value.uid
        return 0

    @property
    def tme(self):
        """
        Method that returns the tme when the job was started.

        :return: tme of the job
        """
        try:
            return self._root.value.tme
        except:
            return self._tme

    @property
    def exit_tme(self):
        """
        Method that returns the exit_tme when the job was finished.
        Attention: this does not necessarily have to be the last known tme!

        This method returns None if the job has not been finished so far.

        :return: Exit_tme or None if unfinished
        """
        try:
            return self._root.value.exit_tme
        except AttributeError:
            return None

    @property
    def exit_code(self):
        """
        Method returns the exit_code of the job.

        :return: exit_code of job
        """
        return self._root.value.exit_code

    @property
    def tree(self):
        """
        Method returns the assigned tree of the job.

        :return: process tree of job
        """
        return self._get_tree()

    @property
    def process_cache(self):
        """
        This method gives access to the actual process cache used for building the job.

        :return: process_cache
        """
        return self._process_cache.object_cache

    @property
    def faulty_nodes(self):
        """
        This method gives access to faulty nodes that have not correctly been assigned to the job.

        :return: faulty_nodes
        """
        return self._process_cache.faulty_nodes

    def regenerate_tree(self):
        """
        Method to re-generate the assigned tree. The actual tree is cached. If a change has been
        applied to the job after the tree was generated, it should be regenerated because it is
        internally cached.

        :return: re-generated process tree of job
        """
        return self._get_tree(reinitialize=True)

    def add_node_object(self, node=None, is_root=False):
        """
        Method adds a process that is already encapsulated into a node to the current job.

        :param node: node to be added
        :param is_root: is the process root of the tree
        """
        self._add(node=node, is_root=is_root)

    def add_process(self, process=None, is_root=False):
        """
        Method that adds a process to the current job.

        :param process: process to be added
        :param is_root: is the process root of the tree
        """
        node = Node(value=process)
        self._add(node=node, is_root=is_root)

    def add_traffic(self, traffic=None):
        """
        Method to add traffic to the current job.

        :param traffic: traffic to be added
        """
        process_node = self._process_cache.get_data(
            value=traffic.tme + (self._configuration.interval if self._configuration else 20),
            key=traffic.pid,
            value_function=lambda data: data.value.tme
        )
        process_node.value.traffic.append(traffic)

    def is_valid(self):
        """
        Method that checks if the current job is valid. Therefore it validates that only for one
        process no parent was found (root process). It also checks that a root is existent at all.
        Method also recursively checks if all processes are valid. This means, each process needs
        a start and exit event.

        :return: true if job seems to be valid, false otherwise
        """
        if len(self._process_cache.faulty_nodes) > 1 or self._root is None:
            return False
        process_cache = self.process_cache
        for pid in process_cache:
            for node in process_cache[pid]:
                if not node.value.valid:
                    return False
        return True

    def is_complete(self):
        """
        Method that tells if the job is completed by checking if the actual process tree can be
        generated.
        Attention: true does not mean it is complete. There might still be some processes missing.

        :return: true if tree can be generated, false otherwise
        """
        tree = self.tree
        return tree is not None

    def parent(self, process=None):
        try:
            parent = self._process_cache.get_data(
                value=process.tme,
                key=process.ppid,
                value_function=lambda data: data.value.tme,
                range_end_value_function=lambda data: data.value.exit_tme,
                validate_range=True).value
        except DataNotInCacheException:
            parent = None
            if process == self._root.value:
                raise ObjectIsRootException(process)
        return parent

    def processes(self):
        """
        Generator that returns processes of the job in depth first order.

        :return: process generator of the job
        """
        tree = self.tree
        if tree is not None:
            for node, depth in tree.walkDFS():
                node.value.tree_depth = depth
                yield node.value
        else:
            logging.getLogger(self.__class__.__name__).warning("There is no tree for current job")
            process_cache = self.process_cache
            for pid in process_cache:
                for node in process_cache[pid]:
                    yield node.value

    def processes_in_order(self):
        """
        Method that returns processes in order depending on tme and their pid. This is especially
        useful when replaying a file as a stream.

        :return: process generator of the job
        """
        tree = self.tree
        # create the actual array
        processes = [node.value for node, _ in tree.walkDFS()]
        processes_in_order = []
        processes.sort(key=lambda x: x.tme)
        current_tme = processes[0].tme
        current_processes = []
        current_pid = processes[0].gpid - 1  # to also include first pid in correct order
        current_pid_tme = processes[0].tme
        current_pid_exit_tme = processes[0].exit_tme
        _create_order = self._create_order
        while processes:
            if processes[0].tme == current_tme:
                current_processes.append(processes.pop(0))
            else:
                # do sorting
                ordered = _create_order(current_processes, current_pid, current_pid_tme, current_pid_exit_tme)
                # reset values
                current_tme = processes[0].tme
                current_pid = ordered[-1].pid
                current_pid_tme = ordered[-1].tme
                current_pid_exit_tme = ordered[-1].exit_tme
                processes_in_order.extend(ordered)
                current_processes = []
        if current_processes:
            ordered = _create_order(current_processes, current_pid, current_pid_tme, current_pid_exit_tme)
            processes_in_order.extend(ordered)
        for process in processes_in_order:
            yield process

    def _create_order(self, elements, start_pid, start_pid_tme, start_pid_exit_tme):
        elements_in_order = []
        elements.sort(key=lambda x: x.pid)
        # check if the tmes from start_pid are close, than we can directly consider start_pid
        base_tme = elements[0].tme
        if base_tme - start_pid_tme > 100:
            ppid_list = [element.ppid for element in elements]
            try:
                candidate_generator = (element for element in elements if element.pid in ppid_list)
                candidate = next(candidate_generator)
                while candidate.pid in ppid_list:
                    candidate = next(candidate_generator)
            except StopIteration:
                candidate = None
            if candidate is not None:
                # check if there is something on the left to be taken...
                # TODO: when it needs to go on from the back, I do have a problem so far...
                possible_start_elements = [element.pid for element in elements if element.pid < candidate.pid]
                last_valid = candidate.pid
                possible_element = possible_start_elements.pop()
                while last_valid - possible_element < 50 and len(possible_start_elements) > 0:
                    last_valid = possible_element
                    possible_element = possible_start_elements.pop()
                start_pid = last_valid - 1

        bigger = [process for process in elements if process.pid > start_pid]
        elements_in_order.extend(bigger)
        smaller = [process for process in elements if process.pid <= start_pid]
        elements_in_order.extend(smaller)
        # as long as there are items that depend on others in the back, put them to the back of the list
        pid_list = [element.pid for element in elements_in_order]
        for index, element in enumerate(elements_in_order[:]):
            if element.ppid in pid_list[index + 1:]:
                # move element to back
                elements_in_order.remove(element)
                elements_in_order.append(element)
        return elements_in_order

    def process_count(self):
        """
        Method that returns the count of the processes inside the job.

        :return: process count
        """
        count = 0
        process_cache = self.process_cache
        for pid in process_cache:
            count += len(process_cache[pid])
        return count

    @staticmethod
    def default_header(**kwargs):
        """
        Returns the header for CSV output in dictionary format.

        :param length: Number of elements being expected
        :return: Dictionary of keys and their positions
        """
        length = kwargs.get("length", 15)
        if length == 9:
            return {"tme": 0, "pid": 1, "ppid": 2, "uid": 3, "name": 4, "cmd": 5, "exit_code": 6,
                    "state": 7, "gpid": 8}
        return {"tme": 0, "exit_tme": 1, "pid": 2, "ppid": 3, "gpid": 4, "uid": 5, "name": 6,
                "cmd": 7, "error_code": 8, "signal": 9, "valid": 10, "int_in_volume": 11,
                "int_out_volume": 12, "ext_in_volume": 13, "ext_out_volume": 14}

    def _add(self, node=None, is_root=False):
        if "sge_shepherd" in node.value.cmd or is_root or node.value.tree_depth == 0:
            if self._root is not None:
                raise NonUniqueRootException
            self._root = node
        if node.value.exit_tme > self._last_tme:
            self._last_tme = node.value.exit_tme
        self._process_cache.add_data(data=node, key=node.value.pid, value=node.value.tme,
                                     value_function=lambda data: data.value.tme)

    def _get_tree(self, reinitialize=False):
        if reinitialize or not self._tree_initialized:
            if reinitialize:
                self._tree = None
                self._process_cache.faulty_nodes = set()
                for pid in self.process_cache:
                    for node in self.process_cache[pid]:
                        node.children = []
            self._initialize_tree()
            self._tree_initialized = True
            if self._tree is None:
                if (len(self._process_cache.faulty_nodes) <= 1 and self._root and
                       (Tree(self._root).getVertexCount() == self.process_count())):
                    self._tree = Tree(self._root)
            logging.getLogger(self.__class__.__name__).info(
                "faulty nodes: %s", self._process_cache.faulty_nodes
            )
        return self._tree

    # @staticmethod
    # def _add_function(child, children, tmes, pids):
    #     tme_index = bisect.bisect_left(tmes, child.value.tme)
    #     # check for equality of following element
    #     # FIXME: removed, because now I have special function for orderings
    #     if tmes[tme_index] == child.value.tme or tmes[tme_index + 1] == child.value.tme \
    #             if len(children) > tme_index + 1 else True:
    #         right_index = bisect.bisect_right(tmes, child.value.tme)
    #         pid_range = pids[tme_index:right_index]
    #         # I also need to do a sorting regarding pid
    #         # so first filter relevant elements with same tme
    #         pid_index = bisect.bisect_left(pid_range, child.value.pid)
    #         return tme_index + pid_index
    #     return tme_index

    def _initialize_tree(self):
        logging.getLogger(self.__class__.__name__).info("Initializing tree structure")
        # rebind to local variables for faster lookup
        process_cache = self.process_cache  # object cache
        _process_cache = self._process_cache
        self_process_cache_get_data = self._process_cache.get_data
        # sort the keys first to get the correct ordering in the final tree
        for pid in process_cache.keys():
            for node in process_cache[pid][:]:
                try:
                    parent = self_process_cache_get_data(
                        value=node.value.tme,
                        key=node.value.ppid,
                        remember_error=True,
                        value_function=lambda data: data.value.tme,
                        range_end_value_function=lambda data: data.value.exit_tme,
                        validate_range=True)
                except DataNotInCacheException:
                    # TODO: maybe also check for exit tme
                    if self._root is not None and \
                            (node.value.tme < self._root.value.tme or
                                node.value.exit_tme > self._root.value.exit_tme):
                        # skip it manually
                        # it is valid here to remove the nodes...
                        _process_cache.remove_data(node, node.value.pid, node.value.tme)
                        _process_cache.faulty_nodes.remove(node.value.ppid)
                    else:
                        if node is self._root:
                            continue
                        logging.getLogger(self.__class__.__name__).warning("Skipping tree generation")
                        return
                else:
                    if parent:
                        #parent.add(node, orderPosition=self._add_function)
                        parent.add(node)
        logging.getLogger(self.__class__.__name__).info(
            "no parents found for %d nodes", len(self._process_cache.faulty_nodes)
        )

    def __repr__(self):
        return "%s: db_id (%s), job_id (%s), gpid (%d), workernode (%s), configuration (%s), " \
               "run (%s), tme (%d), root (%s), process_cache (%s), tree_initialized (%s)" % \
               (self.__class__.__name__, self.db_id, self.job_id, self.gpid, self.workernode,
                self.configuration, self.run, self.tme, (self._root and self._root.value),
                self._process_cache, self._tree_initialized)
Beispiel #10
0
 def setUp(self):
     self.object_cache = ObjectCache()
Beispiel #11
0
class TestObjectCacheFunctions(unittest.TestCase):
    def setUp(self):
        self.object_cache = ObjectCache()

    def test_setUp(self):
        self.assertEqual(len(self.object_cache.object_cache), 0,
                         "object cache not empty")
        self.assertEqual(len(self.object_cache.faulty_nodes), 0,
                         "object cache not empty")
        self.assertEqual(len(self.object_cache.unfound), 0,
                         "object cache not empty")

    def test_insertRemove(self):
        process = Process(tme=1, pid=2)
        process2 = Process(tme=2, pid=2)
        process3 = Process(tme=0, pid=2)
        process4 = Process(tme=0, pid=3)

        self.assertEqual(len(self.object_cache.object_cache), 0,
                         "object cache not empty")
        self.object_cache.add_data(data=process)
        self.assertEqual(len(self.object_cache.object_cache), 1,
                         "object cache should contain one process")

        loadedProcess = self.object_cache.get_data(value=process.tme,
                                                   key=process.pid)
        self.assertIsNotNone(loadedProcess, "No object loaded from cache")
        self.assertEqual(process, loadedProcess, "objects should be identical")
        self.object_cache.remove_data(data=process)
        self.assertEqual(len(self.object_cache.object_cache), 0,
                         "object cache not empty")

        self.object_cache.add_data(data=process)
        self.object_cache.add_data(data=process2)
        self.object_cache.add_data(data=process3)
        self.object_cache.add_data(data=process4)
        self.assertEqual(
            len(self.object_cache.object_cache), 2,
            "object cache should contain two different categories")
        loadedProcess = self.object_cache.get_data(value=process2.tme,
                                                   key=process2.pid)
        self.assertEqual(process2, loadedProcess,
                         "objects should be identical")
        loadedProcess = self.object_cache.get_data(value=process3.tme,
                                                   key=process3.pid)
        self.assertEqual(process3, loadedProcess,
                         "objects should be identical")
        loadedProcess = self.object_cache.get_data(value=process.tme,
                                                   key=process.pid)
        self.assertEqual(process, loadedProcess, "objects should be identical")
        loadedProcess = self.object_cache.get_data(value=process4.tme,
                                                   key=process4.pid)
        self.assertEqual(process4, loadedProcess,
                         "objects should be identical")

    def test_removeObject(self):
        process = Process(tme=1, pid=2)
        process2 = Process(tme=2, pid=2)
        process3 = Process(tme=0, pid=2)
        process4 = Process(tme=0, pid=3)

        self.object_cache.add_data(data=process)

        self.assertEqual(len(self.object_cache.object_cache), 1,
                         "object cache should not be empty")
        self.object_cache.remove_data(data=process)
        self.assertEqual(len(self.object_cache.object_cache), 0,
                         "object cache should be empty")

        self.object_cache.add_data(data=process2)

    def test_clear(self):
        process = Process(tme=1, pid=2)
        process2 = Process(tme=2, pid=2)
        process3 = Process(tme=0, pid=2)
        process4 = Process(tme=0, pid=3)

        self.object_cache.add_data(data=process)
        self.object_cache.add_data(data=process2)
        self.object_cache.add_data(data=process3)
        self.object_cache.add_data(data=process4)

        self.assertEqual(
            len(self.object_cache.object_cache), 2,
            "object cache should contain two different categories")
        self.assertEqual(len(self.object_cache.faulty_nodes), 0,
                         "object cache should not have faulty nodes")
        self.assertEqual(len(self.object_cache.unfound), 0,
                         "object cache should not have unfound nodes")

        self.object_cache.unfound.add(process)
        self.object_cache.clear()

        self.assertEqual(len(self.object_cache.object_cache), 0,
                         "object cache should be empty")
        self.assertEqual(len(self.object_cache.faulty_nodes), 0,
                         "faulty nodes should be empty")
        self.assertEqual(len(self.object_cache.unfound), 0,
                         "unfound should be empty")

    def test_update(self):
        process = Process(tme=1, pid=2)
        self.object_cache.add_data(data=process)

        theProcess = self.object_cache.get_data(value=process.tme,
                                                key=process.pid)
        theProcess.name = "test"
        newProcess = self.object_cache.get_data(value=process.tme,
                                                key=process.pid)
        self.assertEqual("test", newProcess.name, "name is not identical")

    def test_updateIndex(self):
        process = Process(tme=1, pid=2, name="old")
        process2 = Process(tme=1, pid=2, name="new")

        self.object_cache.add_data(data=process)

        index = self.object_cache.data_index(value=process.tme,
                                             key=process.pid)
        self.object_cache.object_cache[process.pid][index] = process2

        newProcess = self.object_cache.get_data(value=process.tme,
                                                key=process.pid)
        self.assertEqual(process2.name, newProcess.name)

    def test_getNullObject(self):
        self.assertRaises(DataNotInCacheException, self.object_cache.get_data,
                          1, 1)