コード例 #1
0
    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                elapsed = int(m.group('elapsed'))
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
            if m:
                uuid = m.group('uuid')
                if self._skip_child_jobs:
                    logger.warning('%s: omitting stats from child job %s'
                                   ' because --skip-child-jobs flag is on',
                                   self.label, uuid)
                    continue
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.stats_max = self.stats_max
                child_summarizer.task_stats = self.task_stats
                child_summarizer.tasks = self.tasks
                child_summarizer.starttime = self.starttime
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
            if not m:
                continue

            if self.label is None:
                self.label = m.group('job_uuid')
                logger.debug('%s: using job uuid as label', self.label)
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') == 'read':
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (crunchstat formatting fixed, but old logs still say this)
                continue
            task_id = self.seq_to_uuid[int(m.group('seq'))]
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = datetime.datetime.strptime(
                m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
            if not task.starttime:
                task.starttime = timestamp
                logger.debug('%s: task %s starttime %s',
                             self.label, task_id, timestamp)
            task.finishtime = timestamp

            if not self.starttime:
                self.starttime = timestamp
            self.finishtime = timestamp

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                for val, stat in zip(words[::2], words[1::2]):
                    try:
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                    except ValueError as e:
                        raise ValueError(
                            'Error parsing {} stat in "{}": {!r}'.format(
                                stat, line, e))
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.iteritems():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)
コード例 #2
0
ファイル: summarizer.py プロジェクト: hkbluesky/arvados
    def _run(self, logdata):
        self.detected_crunch1 = False
        for line in logdata:
            if not self.detected_crunch1 and '-8i9sb-' in line:
                self.detected_crunch1 = True

            if self.detected_crunch1:
                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$',
                    line)
                if m:
                    seq = int(m.group('seq'))
                    uuid = m.group('task_uuid')
                    self.seq_to_uuid[seq] = uuid
                    logger.debug('%s: seq %d is task %s', self.label, seq,
                                 uuid)
                    continue

                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds',
                    line)
                if m:
                    task_id = self.seq_to_uuid[int(m.group('seq'))]
                    elapsed = int(m.group('elapsed'))
                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                    if elapsed > self.stats_max['time']['elapsed']:
                        self.stats_max['time']['elapsed'] = elapsed
                    continue

                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$',
                    line)
                if m:
                    uuid = m.group('uuid')
                    if self._skip_child_jobs:
                        logger.warning(
                            '%s: omitting stats from child job %s'
                            ' because --skip-child-jobs flag is on',
                            self.label, uuid)
                        continue
                    logger.debug('%s: follow %s', self.label, uuid)
                    child_summarizer = ProcessSummarizer(uuid)
                    child_summarizer.stats_max = self.stats_max
                    child_summarizer.task_stats = self.task_stats
                    child_summarizer.tasks = self.tasks
                    child_summarizer.starttime = self.starttime
                    child_summarizer.run()
                    logger.debug('%s: done %s', self.label, uuid)
                    continue

                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
                m = re.search(
                    r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$',
                    line)
                if not m:
                    continue
            else:
                # crunch2
                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
                m = re.search(
                    r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$',
                    line)
                if not m:
                    continue

            if self.label is None:
                try:
                    self.label = m.group('job_uuid')
                except IndexError:
                    self.label = 'container'
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') in ('read', 'open', 'cgroup', 'CID',
                                         'Running'):
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (old logs are less careful with unprefixed error messages)
                continue

            if self.detected_crunch1:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
            else:
                task_id = 'container'
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = m.group('timestamp')
            if timestamp[10:11] == '_':
                timestamp = datetime.datetime.strptime(timestamp,
                                                       '%Y-%m-%d_%H:%M:%S')
            elif timestamp[10:11] == 'T':
                timestamp = datetime.datetime.strptime(timestamp[:19],
                                                       '%Y-%m-%dT%H:%M:%S')
            else:
                raise ValueError(
                    "Cannot parse timestamp {!r}".format(timestamp))

            if task.starttime is None:
                logger.debug('%s: task %s starttime %s', self.label, task_id,
                             timestamp)
            if task.starttime is None or timestamp < task.starttime:
                task.starttime = timestamp
            if task.finishtime is None or timestamp > task.finishtime:
                task.finishtime = timestamp

            if self.starttime is None or timestamp < task.starttime:
                self.starttime = timestamp
            if self.finishtime is None or timestamp < task.finishtime:
                self.finishtime = timestamp

            if (
                    not self.detected_crunch1
            ) and task.starttime is not None and task.finishtime is not None:
                elapsed = (task.finishtime - task.starttime).seconds
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                try:
                    for val, stat in zip(words[::2], words[1::2]):
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                except ValueError as e:
                    # If the line doesn't start with 'crunchstat:' we
                    # might have mistaken an error message for a
                    # structured crunchstat line.
                    if m.group("crunchstat") is None or m.group(
                            "category") == "crunchstat":
                        logger.warning("%s: log contains message\n  %s",
                                       self.label, line)
                    else:
                        logger.warning(
                            '%s: Error parsing value %r (stat %r, category %r): %r',
                            self.label, val, stat, category, e)
                        logger.warning('%s', line)
                    continue
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get(
                        'sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.items():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.items():
            for category, stat_last in task_stat.items():
                for stat, val in stat_last.items():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)
コード例 #3
0
ファイル: summarizer.py プロジェクト: chapmanb/arvados
    def _run(self, logdata):
        self.detected_crunch1 = False
        for line in logdata:
            if not self.detected_crunch1 and '-8i9sb-' in line:
                self.detected_crunch1 = True

            if self.detected_crunch1:
                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
                if m:
                    seq = int(m.group('seq'))
                    uuid = m.group('task_uuid')
                    self.seq_to_uuid[seq] = uuid
                    logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                    continue

                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
                if m:
                    task_id = self.seq_to_uuid[int(m.group('seq'))]
                    elapsed = int(m.group('elapsed'))
                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                    if elapsed > self.stats_max['time']['elapsed']:
                        self.stats_max['time']['elapsed'] = elapsed
                    continue

                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
                if m:
                    uuid = m.group('uuid')
                    if self._skip_child_jobs:
                        logger.warning('%s: omitting stats from child job %s'
                                       ' because --skip-child-jobs flag is on',
                                       self.label, uuid)
                        continue
                    logger.debug('%s: follow %s', self.label, uuid)
                    child_summarizer = ProcessSummarizer(uuid)
                    child_summarizer.stats_max = self.stats_max
                    child_summarizer.task_stats = self.task_stats
                    child_summarizer.tasks = self.tasks
                    child_summarizer.starttime = self.starttime
                    child_summarizer.run()
                    logger.debug('%s: done %s', self.label, uuid)
                    continue

                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                if not m:
                    continue
            else:
                # crunch2
                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                if not m:
                    continue

            if self.label is None:
                try:
                    self.label = m.group('job_uuid')
                except IndexError:
                    self.label = 'container'
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (old logs are less careful with unprefixed error messages)
                continue

            if self.detected_crunch1:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
            else:
                task_id = 'container'
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = m.group('timestamp')
            if timestamp[10:11] == '_':
                timestamp = datetime.datetime.strptime(
                    timestamp, '%Y-%m-%d_%H:%M:%S')
            elif timestamp[10:11] == 'T':
                timestamp = datetime.datetime.strptime(
                    timestamp[:19], '%Y-%m-%dT%H:%M:%S')
            else:
                raise ValueError("Cannot parse timestamp {!r}".format(
                    timestamp))

            if task.starttime is None:
                logger.debug('%s: task %s starttime %s',
                             self.label, task_id, timestamp)
            if task.starttime is None or timestamp < task.starttime:
                task.starttime = timestamp
            if task.finishtime is None or timestamp > task.finishtime:
                task.finishtime = timestamp

            if self.starttime is None or timestamp < task.starttime:
                self.starttime = timestamp
            if self.finishtime is None or timestamp < task.finishtime:
                self.finishtime = timestamp

            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
                elapsed = (task.finishtime - task.starttime).seconds
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                try:
                    for val, stat in zip(words[::2], words[1::2]):
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                except ValueError as e:
                    # If the line doesn't start with 'crunchstat:' we
                    # might have mistaken an error message for a
                    # structured crunchstat line.
                    if m.group("crunchstat") is None or m.group("category") == "crunchstat":
                        logger.warning("%s: log contains message\n  %s", self.label, line)
                    else:
                        logger.warning(
                            '%s: Error parsing value %r (stat %r, category %r): %r',
                            self.label, val, stat, category, e)
                        logger.warning('%s', line)
                    continue
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.iteritems():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)