Beispiel #1
0
 def __init__(self, pipeline_instance_uuid, **kwargs):
     arv = arvados.api('v1', model=OrderedJsonModel())
     instance = arv.pipeline_instances().get(
         uuid=pipeline_instance_uuid).execute()
     self.summarizers = collections.OrderedDict()
     for cname, component in instance['components'].iteritems():
         if 'job' not in component:
             logger.warning("%s: skipping component with no job assigned",
                            cname)
         else:
             logger.info("%s: job %s", cname, component['job']['uuid'])
             summarizer = JobSummarizer(component['job'], **kwargs)
             summarizer.label = '{} {}'.format(cname,
                                               component['job']['uuid'])
             self.summarizers[cname] = summarizer
     self.label = pipeline_instance_uuid
Beispiel #2
0
 def __init__(self, instance, **kwargs):
     children = collections.OrderedDict()
     for cname, component in instance['components'].items():
         if 'job' not in component:
             logger.warning("%s: skipping component with no job assigned",
                            cname)
         else:
             logger.info("%s: job %s", cname, component['job']['uuid'])
             summarizer = JobTreeSummarizer(component['job'],
                                            label=cname,
                                            **kwargs)
             summarizer.label = '{} {}'.format(cname,
                                               component['job']['uuid'])
             children[cname] = summarizer
     super(PipelineSummarizer, self).__init__(children=children,
                                              label=instance['uuid'],
                                              **kwargs)
Beispiel #3
0
 def __init__(self, pipeline_instance_uuid, **kwargs):
     arv = arvados.api('v1', model=OrderedJsonModel())
     instance = arv.pipeline_instances().get(
         uuid=pipeline_instance_uuid).execute()
     self.summarizers = collections.OrderedDict()
     for cname, component in instance['components'].iteritems():
         if 'job' not in component:
             logger.warning(
                 "%s: skipping component with no job assigned", cname)
         else:
             logger.info(
                 "%s: job %s", cname, component['job']['uuid'])
             summarizer = JobSummarizer(component['job'], **kwargs)
             summarizer.label = '{} {}'.format(
                 cname, component['job']['uuid'])
             self.summarizers[cname] = summarizer
     self.label = pipeline_instance_uuid
Beispiel #4
0
 def __init__(self, instance, **kwargs):
     children = collections.OrderedDict()
     for cname, component in instance['components'].iteritems():
         if 'job' not in component:
             logger.warning(
                 "%s: skipping component with no job assigned", cname)
         else:
             logger.info(
                 "%s: job %s", cname, component['job']['uuid'])
             summarizer = JobTreeSummarizer(component['job'], label=cname, **kwargs)
             summarizer.label = '{} {}'.format(
                 cname, component['job']['uuid'])
             children[cname] = summarizer
     super(PipelineSummarizer, self).__init__(
         children=children,
         label=instance['uuid'],
         **kwargs)
Beispiel #5
0
    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$',
                line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds',
                line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                elapsed = int(m.group('elapsed'))
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed
                continue

            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$',
                line)
            if m:
                uuid = m.group('uuid')
                if self._skip_child_jobs:
                    logger.warning(
                        '%s: omitting stats from child job %s'
                        ' because --skip-child-jobs flag is on', self.label,
                        uuid)
                    continue
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.stats_max = self.stats_max
                child_summarizer.task_stats = self.task_stats
                child_summarizer.tasks = self.tasks
                child_summarizer.starttime = self.starttime
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(
                r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n',
                line)
            if not m:
                continue

            try:
                if self.label is None:
                    self.label = m.group('job_uuid')
                    logger.debug('%s: using job uuid as label', self.label)
                if m.group('category').endswith(':'):
                    # "stderr crunchstat: notice: ..."
                    continue
                elif m.group('category') in ('error', 'caught'):
                    continue
                elif m.group('category') == 'read':
                    # "stderr crunchstat: read /proc/1234/net/dev: ..."
                    # (crunchstat formatting fixed, but old logs still say this)
                    continue
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                task = self.tasks[task_id]

                # Use the first and last crunchstat timestamps as
                # approximations of starttime and finishtime.
                timestamp = datetime.datetime.strptime(m.group('timestamp'),
                                                       '%Y-%m-%d_%H:%M:%S')
                if not task.starttime:
                    task.starttime = timestamp
                    logger.debug('%s: task %s starttime %s', self.label,
                                 task_id, timestamp)
                task.finishtime = timestamp

                if not self.starttime:
                    self.starttime = timestamp
                self.finishtime = timestamp

                this_interval_s = None
                for group in ['current', 'interval']:
                    if not m.group(group):
                        continue
                    category = m.group('category')
                    words = m.group(group).split(' ')
                    stats = {}
                    for val, stat in zip(words[::2], words[1::2]):
                        try:
                            if '.' in val:
                                stats[stat] = float(val)
                            else:
                                stats[stat] = int(val)
                        except ValueError as e:
                            raise ValueError(
                                'Error parsing {} stat: {!r}'.format(stat, e))
                    if 'user' in stats or 'sys' in stats:
                        stats['user+sys'] = stats.get('user', 0) + stats.get(
                            'sys', 0)
                    if 'tx' in stats or 'rx' in stats:
                        stats['tx+rx'] = stats.get('tx', 0) + stats.get(
                            'rx', 0)
                    for stat, val in stats.iteritems():
                        if group == 'interval':
                            if stat == 'seconds':
                                this_interval_s = val
                                continue
                            elif not (this_interval_s > 0):
                                logger.error(
                                    "BUG? interval stat given with duration {!r}"
                                    .format(this_interval_s))
                                continue
                            else:
                                stat = stat + '__rate'
                                val = val / this_interval_s
                                if stat in ['user+sys__rate', 'tx+rx__rate']:
                                    task.series[category, stat].append(
                                        (timestamp - self.starttime, val))
                        else:
                            if stat in ['rss']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                            self.task_stats[task_id][category][stat] = val
                        if val > self.stats_max[category][stat]:
                            self.stats_max[category][stat] = val
            except Exception as e:
                logger.info(
                    'Skipping malformed line: {}Error was: {}\n'.format(
                        line, e))
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)
Beispiel #6
0
    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                elapsed = int(m.group('elapsed'))
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
            if m:
                uuid = m.group('uuid')
                if self._skip_child_jobs:
                    logger.warning('%s: omitting stats from child job %s'
                                   ' because --skip-child-jobs flag is on',
                                   self.label, uuid)
                    continue
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.stats_max = self.stats_max
                child_summarizer.task_stats = self.task_stats
                child_summarizer.tasks = self.tasks
                child_summarizer.starttime = self.starttime
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
            if not m:
                continue

            try:
                if self.label is None:
                    self.label = m.group('job_uuid')
                    logger.debug('%s: using job uuid as label', self.label)
                if m.group('category').endswith(':'):
                    # "stderr crunchstat: notice: ..."
                    continue
                elif m.group('category') in ('error', 'caught'):
                    continue
                elif m.group('category') == 'read':
                    # "stderr crunchstat: read /proc/1234/net/dev: ..."
                    # (crunchstat formatting fixed, but old logs still say this)
                    continue
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                task = self.tasks[task_id]

                # Use the first and last crunchstat timestamps as
                # approximations of starttime and finishtime.
                timestamp = datetime.datetime.strptime(
                    m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
                if not task.starttime:
                    task.starttime = timestamp
                    logger.debug('%s: task %s starttime %s',
                                 self.label, task_id, timestamp)
                task.finishtime = timestamp

                if not self.starttime:
                    self.starttime = timestamp
                self.finishtime = timestamp

                this_interval_s = None
                for group in ['current', 'interval']:
                    if not m.group(group):
                        continue
                    category = m.group('category')
                    words = m.group(group).split(' ')
                    stats = {}
                    for val, stat in zip(words[::2], words[1::2]):
                        try:
                            if '.' in val:
                                stats[stat] = float(val)
                            else:
                                stats[stat] = int(val)
                        except ValueError as e:
                            raise ValueError(
                                'Error parsing {} stat: {!r}'.format(
                                    stat, e))
                    if 'user' in stats or 'sys' in stats:
                        stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                    if 'tx' in stats or 'rx' in stats:
                        stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                    for stat, val in stats.iteritems():
                        if group == 'interval':
                            if stat == 'seconds':
                                this_interval_s = val
                                continue
                            elif not (this_interval_s > 0):
                                logger.error(
                                    "BUG? interval stat given with duration {!r}".
                                    format(this_interval_s))
                                continue
                            else:
                                stat = stat + '__rate'
                                val = val / this_interval_s
                                if stat in ['user+sys__rate', 'tx+rx__rate']:
                                    task.series[category, stat].append(
                                        (timestamp - self.starttime, val))
                        else:
                            if stat in ['rss']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                            self.task_stats[task_id][category][stat] = val
                        if val > self.stats_max[category][stat]:
                            self.stats_max[category][stat] = val
            except Exception as e:
                logger.info('Skipping malformed line: {}Error was: {}\n'.format(line, e))
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)