def run(self): logger.debug("%s: parsing logdata %s", self.label, self._logdata) for line in self._logdata: m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line) if m: seq = int(m.group('seq')) uuid = m.group('task_uuid') self.seq_to_uuid[seq] = uuid logger.debug('%s: seq %d is task %s', self.label, seq, uuid) continue m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line) if m: task_id = self.seq_to_uuid[int(m.group('seq'))] elapsed = int(m.group('elapsed')) self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed continue m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line) if m: uuid = m.group('uuid') if self._skip_child_jobs: logger.warning('%s: omitting stats from child job %s' ' because --skip-child-jobs flag is on', self.label, uuid) continue logger.debug('%s: follow %s', self.label, uuid) child_summarizer = JobSummarizer(uuid) child_summarizer.stats_max = self.stats_max child_summarizer.task_stats = self.task_stats child_summarizer.tasks = self.tasks child_summarizer.starttime = self.starttime child_summarizer.run() logger.debug('%s: done %s', self.label, uuid) continue m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line) if not m: continue if self.label is None: self.label = m.group('job_uuid') logger.debug('%s: using job uuid as label', self.label) if m.group('category').endswith(':'): # "stderr crunchstat: notice: ..." continue elif m.group('category') in ('error', 'caught'): continue elif m.group('category') == 'read': # "stderr crunchstat: read /proc/1234/net/dev: ..." # (crunchstat formatting fixed, but old logs still say this) continue task_id = self.seq_to_uuid[int(m.group('seq'))] task = self.tasks[task_id] # Use the first and last crunchstat timestamps as # approximations of starttime and finishtime. timestamp = datetime.datetime.strptime( m.group('timestamp'), '%Y-%m-%d_%H:%M:%S') if not task.starttime: task.starttime = timestamp logger.debug('%s: task %s starttime %s', self.label, task_id, timestamp) task.finishtime = timestamp if not self.starttime: self.starttime = timestamp self.finishtime = timestamp this_interval_s = None for group in ['current', 'interval']: if not m.group(group): continue category = m.group('category') words = m.group(group).split(' ') stats = {} for val, stat in zip(words[::2], words[1::2]): try: if '.' in val: stats[stat] = float(val) else: stats[stat] = int(val) except ValueError as e: raise ValueError( 'Error parsing {} stat in "{}": {!r}'.format( stat, line, e)) if 'user' in stats or 'sys' in stats: stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0) if 'tx' in stats or 'rx' in stats: stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0) for stat, val in stats.iteritems(): if group == 'interval': if stat == 'seconds': this_interval_s = val continue elif not (this_interval_s > 0): logger.error( "BUG? interval stat given with duration {!r}". format(this_interval_s)) continue else: stat = stat + '__rate' val = val / this_interval_s if stat in ['user+sys__rate', 'tx+rx__rate']: task.series[category, stat].append( (timestamp - self.starttime, val)) else: if stat in ['rss']: task.series[category, stat].append( (timestamp - self.starttime, val)) self.task_stats[task_id][category][stat] = val if val > self.stats_max[category][stat]: self.stats_max[category][stat] = val logger.debug('%s: done parsing', self.label) self.job_tot = collections.defaultdict( functools.partial(collections.defaultdict, int)) for task_id, task_stat in self.task_stats.iteritems(): for category, stat_last in task_stat.iteritems(): for stat, val in stat_last.iteritems(): if stat in ['cpus', 'cache', 'swap', 'rss']: # meaningless stats like 16 cpu cores x 5 tasks = 80 continue self.job_tot[category][stat] += val logger.debug('%s: done totals', self.label)
def _run(self, logdata): self.detected_crunch1 = False for line in logdata: if not self.detected_crunch1 and '-8i9sb-' in line: self.detected_crunch1 = True if self.detected_crunch1: m = re.search( r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line) if m: seq = int(m.group('seq')) uuid = m.group('task_uuid') self.seq_to_uuid[seq] = uuid logger.debug('%s: seq %d is task %s', self.label, seq, uuid) continue m = re.search( r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line) if m: task_id = self.seq_to_uuid[int(m.group('seq'))] elapsed = int(m.group('elapsed')) self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed continue m = re.search( r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line) if m: uuid = m.group('uuid') if self._skip_child_jobs: logger.warning( '%s: omitting stats from child job %s' ' because --skip-child-jobs flag is on', self.label, uuid) continue logger.debug('%s: follow %s', self.label, uuid) child_summarizer = ProcessSummarizer(uuid) child_summarizer.stats_max = self.stats_max child_summarizer.task_stats = self.task_stats child_summarizer.tasks = self.tasks child_summarizer.starttime = self.starttime child_summarizer.run() logger.debug('%s: done %s', self.label, uuid) continue # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get m = re.search( r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line) if not m: continue else: # crunch2 # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get m = re.search( r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line) if not m: continue if self.label is None: try: self.label = m.group('job_uuid') except IndexError: self.label = 'container' if m.group('category').endswith(':'): # "stderr crunchstat: notice: ..." continue elif m.group('category') in ('error', 'caught'): continue elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'): # "stderr crunchstat: read /proc/1234/net/dev: ..." # (old logs are less careful with unprefixed error messages) continue if self.detected_crunch1: task_id = self.seq_to_uuid[int(m.group('seq'))] else: task_id = 'container' task = self.tasks[task_id] # Use the first and last crunchstat timestamps as # approximations of starttime and finishtime. timestamp = m.group('timestamp') if timestamp[10:11] == '_': timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%d_%H:%M:%S') elif timestamp[10:11] == 'T': timestamp = datetime.datetime.strptime(timestamp[:19], '%Y-%m-%dT%H:%M:%S') else: raise ValueError( "Cannot parse timestamp {!r}".format(timestamp)) if task.starttime is None: logger.debug('%s: task %s starttime %s', self.label, task_id, timestamp) if task.starttime is None or timestamp < task.starttime: task.starttime = timestamp if task.finishtime is None or timestamp > task.finishtime: task.finishtime = timestamp if self.starttime is None or timestamp < task.starttime: self.starttime = timestamp if self.finishtime is None or timestamp < task.finishtime: self.finishtime = timestamp if ( not self.detected_crunch1 ) and task.starttime is not None and task.finishtime is not None: elapsed = (task.finishtime - task.starttime).seconds self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed this_interval_s = None for group in ['current', 'interval']: if not m.group(group): continue category = m.group('category') words = m.group(group).split(' ') stats = {} try: for val, stat in zip(words[::2], words[1::2]): if '.' in val: stats[stat] = float(val) else: stats[stat] = int(val) except ValueError as e: # If the line doesn't start with 'crunchstat:' we # might have mistaken an error message for a # structured crunchstat line. if m.group("crunchstat") is None or m.group( "category") == "crunchstat": logger.warning("%s: log contains message\n %s", self.label, line) else: logger.warning( '%s: Error parsing value %r (stat %r, category %r): %r', self.label, val, stat, category, e) logger.warning('%s', line) continue if 'user' in stats or 'sys' in stats: stats['user+sys'] = stats.get('user', 0) + stats.get( 'sys', 0) if 'tx' in stats or 'rx' in stats: stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0) for stat, val in stats.items(): if group == 'interval': if stat == 'seconds': this_interval_s = val continue elif not (this_interval_s > 0): logger.error( "BUG? interval stat given with duration {!r}". format(this_interval_s)) continue else: stat = stat + '__rate' val = val / this_interval_s if stat in ['user+sys__rate', 'tx+rx__rate']: task.series[category, stat].append( (timestamp - self.starttime, val)) else: if stat in ['rss']: task.series[category, stat].append( (timestamp - self.starttime, val)) self.task_stats[task_id][category][stat] = val if val > self.stats_max[category][stat]: self.stats_max[category][stat] = val logger.debug('%s: done parsing', self.label) self.job_tot = collections.defaultdict( functools.partial(collections.defaultdict, int)) for task_id, task_stat in self.task_stats.items(): for category, stat_last in task_stat.items(): for stat, val in stat_last.items(): if stat in ['cpus', 'cache', 'swap', 'rss']: # meaningless stats like 16 cpu cores x 5 tasks = 80 continue self.job_tot[category][stat] += val logger.debug('%s: done totals', self.label)
def _run(self, logdata): self.detected_crunch1 = False for line in logdata: if not self.detected_crunch1 and '-8i9sb-' in line: self.detected_crunch1 = True if self.detected_crunch1: m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line) if m: seq = int(m.group('seq')) uuid = m.group('task_uuid') self.seq_to_uuid[seq] = uuid logger.debug('%s: seq %d is task %s', self.label, seq, uuid) continue m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line) if m: task_id = self.seq_to_uuid[int(m.group('seq'))] elapsed = int(m.group('elapsed')) self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed continue m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line) if m: uuid = m.group('uuid') if self._skip_child_jobs: logger.warning('%s: omitting stats from child job %s' ' because --skip-child-jobs flag is on', self.label, uuid) continue logger.debug('%s: follow %s', self.label, uuid) child_summarizer = ProcessSummarizer(uuid) child_summarizer.stats_max = self.stats_max child_summarizer.task_stats = self.task_stats child_summarizer.tasks = self.tasks child_summarizer.starttime = self.starttime child_summarizer.run() logger.debug('%s: done %s', self.label, uuid) continue # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line) if not m: continue else: # crunch2 # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line) if not m: continue if self.label is None: try: self.label = m.group('job_uuid') except IndexError: self.label = 'container' if m.group('category').endswith(':'): # "stderr crunchstat: notice: ..." continue elif m.group('category') in ('error', 'caught'): continue elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'): # "stderr crunchstat: read /proc/1234/net/dev: ..." # (old logs are less careful with unprefixed error messages) continue if self.detected_crunch1: task_id = self.seq_to_uuid[int(m.group('seq'))] else: task_id = 'container' task = self.tasks[task_id] # Use the first and last crunchstat timestamps as # approximations of starttime and finishtime. timestamp = m.group('timestamp') if timestamp[10:11] == '_': timestamp = datetime.datetime.strptime( timestamp, '%Y-%m-%d_%H:%M:%S') elif timestamp[10:11] == 'T': timestamp = datetime.datetime.strptime( timestamp[:19], '%Y-%m-%dT%H:%M:%S') else: raise ValueError("Cannot parse timestamp {!r}".format( timestamp)) if task.starttime is None: logger.debug('%s: task %s starttime %s', self.label, task_id, timestamp) if task.starttime is None or timestamp < task.starttime: task.starttime = timestamp if task.finishtime is None or timestamp > task.finishtime: task.finishtime = timestamp if self.starttime is None or timestamp < task.starttime: self.starttime = timestamp if self.finishtime is None or timestamp < task.finishtime: self.finishtime = timestamp if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None: elapsed = (task.finishtime - task.starttime).seconds self.task_stats[task_id]['time'] = {'elapsed': elapsed} if elapsed > self.stats_max['time']['elapsed']: self.stats_max['time']['elapsed'] = elapsed this_interval_s = None for group in ['current', 'interval']: if not m.group(group): continue category = m.group('category') words = m.group(group).split(' ') stats = {} try: for val, stat in zip(words[::2], words[1::2]): if '.' in val: stats[stat] = float(val) else: stats[stat] = int(val) except ValueError as e: # If the line doesn't start with 'crunchstat:' we # might have mistaken an error message for a # structured crunchstat line. if m.group("crunchstat") is None or m.group("category") == "crunchstat": logger.warning("%s: log contains message\n %s", self.label, line) else: logger.warning( '%s: Error parsing value %r (stat %r, category %r): %r', self.label, val, stat, category, e) logger.warning('%s', line) continue if 'user' in stats or 'sys' in stats: stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0) if 'tx' in stats or 'rx' in stats: stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0) for stat, val in stats.iteritems(): if group == 'interval': if stat == 'seconds': this_interval_s = val continue elif not (this_interval_s > 0): logger.error( "BUG? interval stat given with duration {!r}". format(this_interval_s)) continue else: stat = stat + '__rate' val = val / this_interval_s if stat in ['user+sys__rate', 'tx+rx__rate']: task.series[category, stat].append( (timestamp - self.starttime, val)) else: if stat in ['rss']: task.series[category, stat].append( (timestamp - self.starttime, val)) self.task_stats[task_id][category][stat] = val if val > self.stats_max[category][stat]: self.stats_max[category][stat] = val logger.debug('%s: done parsing', self.label) self.job_tot = collections.defaultdict( functools.partial(collections.defaultdict, int)) for task_id, task_stat in self.task_stats.iteritems(): for category, stat_last in task_stat.iteritems(): for stat, val in stat_last.iteritems(): if stat in ['cpus', 'cache', 'swap', 'rss']: # meaningless stats like 16 cpu cores x 5 tasks = 80 continue self.job_tot[category][stat] += val logger.debug('%s: done totals', self.label)