def run(self): """Thread entrypoint. Loop indefinitely, polling collectors at self._collection_interval and collating samples.""" log.debug('Commencing resource monitoring for task "%s"' % self._task_id) next_process_collection = 0 next_disk_collection = 0 while not self._kill_signal.is_set(): now = time.time() if now > next_process_collection: next_process_collection = now + self._process_collection_interval actives = set(self._get_active_processes()) current = set(self._process_collectors) for process in current - actives: self._process_collectors.pop(process) for process in actives - current: self._process_collectors[process] = self._process_collector_factory(process.pid) for process, collector in self._process_collectors.items(): collector.sample() if now > next_disk_collection: next_disk_collection = now + self._disk_collection_interval if not self._disk_collector: sandbox = self._task_monitor.get_sandbox() if sandbox: self._disk_collector = self._disk_collector_class(sandbox) if self._disk_collector: self._disk_collector.sample() else: log.debug('No sandbox detected yet for %s' % self._task_id) try: aggregated_procs = sum(map(attrgetter('procs'), self._process_collectors.values())) aggregated_sample = sum(map(attrgetter('value'), self._process_collectors.values()), ProcessSample.empty()) disk_value = self._disk_collector.value if self._disk_collector else 0 self._history.add(now, self.ResourceResult(aggregated_procs, aggregated_sample, disk_value)) except ValueError as err: log.warning("Error recording resource sample: %s" % err) # Sleep until any of the following conditions are met: # - it's time for the next disk collection # - it's time for the next process collection # - the result from the last disk collection is available via the DiskCollector # - the TaskResourceMonitor has been killed via self._kill_signal now = time.time() next_collection = min(next_process_collection - now, next_disk_collection - now) if self._disk_collector: waiter = EventMuxer(self._kill_signal, self._disk_collector.completed_event) else: waiter = self._kill_signal waiter.wait(timeout=max(0, next_collection)) log.debug('Stopping resource monitoring for task "%s"' % self._task_id)
def test_basic_muxer(): # timeout, no events set muxer = EventMuxer(Event(), Event()) assert not muxer.wait(timeout=0.1) # no re-entry with pytest.raises(RuntimeError): muxer.wait() # bad init with pytest.raises(ValueError): EventMuxer(Event(), 'not_an_event')
def run(self): """Thread entrypoint. Loop indefinitely, polling collectors at self._collection_interval and collating samples.""" log.debug('Commencing resource monitoring for task "%s"', self._task_id) next_process_collection = 0 next_disk_collection = 0 while not self._kill_signal.is_set(): now = time.time() if now > next_process_collection: next_process_collection = now + self._process_collection_interval actives = set(self._get_active_processes()) current = set(self._process_collectors) for process in current - actives: self._process_collectors.pop(process) for process in actives - current: self._process_collectors[process] = ProcessTreeCollector(process.pid) for process, collector in self._process_collectors.items(): collector.sample() if now > next_disk_collection: next_disk_collection = now + self._disk_collection_interval if not self._disk_collector: sandbox = self._task_monitor.get_sandbox() if sandbox: self._disk_collector = self._disk_collector_provider.provides(sandbox) if self._disk_collector: self._disk_collector.sample() else: log.debug('No sandbox detected yet for %s', self._task_id) try: disk_usage = self._disk_collector.value if self._disk_collector else 0 proc_usage_dict = dict() for process, collector in self._process_collectors.items(): proc_usage_dict.update({process: self.ProcResourceResult(collector.value, collector.procs)}) self._history.add(now, self.FullResourceResult(proc_usage_dict, disk_usage)) except ValueError as err: log.warning("Error recording resource sample: %s", err) log.debug("TaskResourceMonitor: finished collection of %s in %.2fs", self._task_id, (time.time() - now)) # Sleep until any of the following conditions are met: # - it's time for the next disk collection # - it's time for the next process collection # - the result from the last disk collection is available via the DiskCollector # - the TaskResourceMonitor has been killed via self._kill_signal now = time.time() next_collection = min(next_process_collection - now, next_disk_collection - now) if self._disk_collector: waiter = EventMuxer(self._kill_signal, self._disk_collector.completed_event) else: waiter = self._kill_signal if next_collection > 0: waiter.wait(timeout=next_collection) else: log.warning('Task resource collection is backlogged. Consider increasing ' 'process_collection_interval and disk_collection_interval.') log.debug('Stopping resource monitoring for task "%s"', self._task_id)
def run(self): """Thread entrypoint. Loop indefinitely, polling collectors at self._collection_interval and collating samples.""" log.debug('Commencing resource monitoring for task "%s"' % self._task_id) next_process_collection = 0 next_disk_collection = 0 while not self._kill_signal.is_set(): now = time.time() if now > next_process_collection: next_process_collection = now + self._process_collection_interval actives = set(self._get_active_processes()) current = set(self._process_collectors) for process in current - actives: self._process_collectors.pop(process) for process in actives - current: self._process_collectors[process] = ProcessTreeCollector( process.pid) for process, collector in self._process_collectors.items(): collector.sample() if now > next_disk_collection: next_disk_collection = now + self._disk_collection_interval if not self._disk_collector: sandbox = self._task_monitor.get_sandbox() if sandbox: self._disk_collector = self._disk_collector_class( sandbox) if self._disk_collector: self._disk_collector.sample() else: log.debug('No sandbox detected yet for %s' % self._task_id) try: disk_usage = self._disk_collector.value if self._disk_collector else 0 proc_usage_dict = dict() for process, collector in self._process_collectors.items(): proc_usage_dict.update({ process: self.ProcResourceResult(collector.value, collector.procs) }) self._history.add( now, self.FullResourceResult(proc_usage_dict, disk_usage)) except ValueError as err: log.warning("Error recording resource sample: %s" % err) log.debug( "TaskResourceMonitor: finished collection of %s in %.2fs" % (self._task_id, (time.time() - now))) # Sleep until any of the following conditions are met: # - it's time for the next disk collection # - it's time for the next process collection # - the result from the last disk collection is available via the DiskCollector # - the TaskResourceMonitor has been killed via self._kill_signal now = time.time() next_collection = min(next_process_collection - now, next_disk_collection - now) if self._disk_collector: waiter = EventMuxer(self._kill_signal, self._disk_collector.completed_event) else: waiter = self._kill_signal if next_collection > 0: waiter.wait(timeout=next_collection) else: log.warning( 'Task resource collection is backlogged. Consider increasing ' 'process_collection_interval and disk_collection_interval.' ) log.debug('Stopping resource monitoring for task "%s"' % self._task_id)