def __iter__(self): self.received = [] if self.num_tasks == 0: return for result in self.iresults: check_mem_usage() # log a warning if too much memory is used if isinstance(result, BaseException): # this happens with WorkerLostError with celery raise result elif isinstance(result, Result): val = result.get() self.received.append(len(result.pik)) else: # this should never happen raise ValueError(result) if OQ_DISTRIBUTE == 'processpool': mem_gb = memory_rss(os.getpid()) + sum( memory_rss(pid) for pid in Starmap.pids) / GB else: mem_gb = numpy.nan next(self.log_percent) if not self.name.startswith('_'): # no info for private tasks self.save_task_info(result.mon, mem_gb) yield val if self.received: tot = sum(self.received) max_per_task = max(self.received) self.progress('Received %s of data, maximum per task %s', humansize(tot), humansize(max_per_task))
def __iter__(self): if self.iresults == (): return () self.received = [] for result in self.iresults: check_mem_usage() # log a warning if too much memory is used if isinstance(result, BaseException): # this happens with WorkerLostError with celery raise result elif isinstance(result, Result): val = result.get() self.received.append(len(result.pik)) else: # this should never happen raise ValueError(result) if OQ_DISTRIBUTE == 'processpool': mem_gb = (memory_rss(os.getpid()) + sum(memory_rss(pid) for pid in Starmap.pids)) / GB else: mem_gb = numpy.nan if not self.name.startswith('_'): # no info for private tasks self.save_task_info(result.mon, mem_gb) if result.splice: yield from val else: yield val if self.received and not self.name.startswith('_'): tot = sum(self.received) max_per_output = max(self.received) msg = 'Received %s from %d outputs, maximum per output %s' logging.info(msg, humansize(tot), len(self.received), humansize(max_per_output))
def test_memory(self): # make sure the memory occupation is low # (to protect against bad refactoring of the XMLWriter) pid = os.getpid() try: rss = memory_rss(pid) except psutil.AccessDenied: raise unittest.SkipTest('Memory info not accessible') devnull = open(os.devnull, 'wb') with StreamingXMLWriter(devnull) as writer: for asset in assetgen(1000): writer.serialize(asset) allocated = memory_rss(pid) - rss self.assertLess(allocated, 256000) # < 250 KB
def __iter__(self): if self.iresults == (): return () t0 = time.time() self.received = [] first_time = True nbytes = AccumDict() for result in self.iresults: msg = check_mem_usage() # log a warning if too much memory is used if msg and first_time: logging.warning(msg) first_time = False # warn only once if isinstance(result, BaseException): # this happens with WorkerLostError with celery raise result elif isinstance(result, Result): val = result.get() self.received.append(len(result.pik)) if hasattr(result, 'nbytes'): nbytes += result.nbytes else: # this should never happen raise ValueError(result) if OQ_DISTRIBUTE == 'processpool' and sys.platform != 'darwin': # it normally works on macOS, but not in notebooks calling # notebooks, which is the case relevant for Marco Pagani mem_gb = (memory_rss(os.getpid()) + sum(memory_rss(pid) for pid in Starmap.pids)) / GB else: # measure only the memory used by the main process mem_gb = memory_rss(os.getpid()) / GB save_task_info(self, result, mem_gb) if not result.func_args: # not subtask yield val if self.received: tot = sum(self.received) max_per_output = max(self.received) logging.info( 'Received %s from %d %s outputs in %d seconds, biggest ' 'output=%s', humansize(tot), len(self.received), self.name, time.time() - t0, humansize(max_per_output)) if nbytes: logging.info('Received %s', {k: humansize(v) for k, v in nbytes.items()})
def __iter__(self): if self.iresults == (): return () t0 = time.time() self.received = [] first_time = True for result in self.iresults: msg = check_mem_usage() # log a warning if too much memory is used if msg and first_time: logging.warn(msg) first_time = False # warn only once if isinstance(result, BaseException): # this happens with WorkerLostError with celery raise result elif isinstance(result, Result): val = result.get() self.received.append(len(result.pik)) else: # this should never happen raise ValueError(result) if OQ_DISTRIBUTE == 'processpool' and sys.platform != 'darwin': # it normally works on macOS, but not in notebooks calling # notebooks, which is the case relevant for Marco Pagani mem_gb = (memory_rss(os.getpid()) + sum(memory_rss(pid) for pid in Starmap.pids)) / GB else: mem_gb = numpy.nan self.save_task_info(result.mon, mem_gb) if result.splice: yield from val else: yield val if self.received: tot = sum(self.received) max_per_output = max(self.received) logging.info( 'Received %s from %d outputs in %d seconds, biggest ' 'output=%s', humansize(tot), len(self.received), time.time() - t0, humansize(max_per_output))