Python debugの例、crunchstat_summary.logger.debug Pythonの例

コード例 #1

0

ファイルを表示

ファイル: reader.py プロジェクト: mwblee/arvados

 def _get_all_pages(self):
     got = 0
     last_id = 0
     filters = [['object_uuid', '=', self.job_uuid],
                ['event_type', 'in', self.event_types]]
     try:
         while True:
             page = arvados.api().logs().index(
                 limit=1000,
                 order=['id asc'],
                 filters=filters + [['id', '>', str(last_id)]],
                 select=['id', 'properties'],
             ).execute(num_retries=2)
             got += len(page['items'])
             logger.debug(
                 '%s: received %d of %d log events', self.job_uuid, got,
                 got + page['items_available'] - len(page['items']))
             for i in page['items']:
                 for line in i['properties']['text'].split('\n'):
                     self._queue.put(line + '\n')
                 last_id = i['id']
             if (len(page['items']) == 0
                     or len(page['items']) >= page['items_available']):
                 break
     finally:
         self._queue.put(self.EOF)

コード例 #2

0

ファイルを表示

ファイル: summarizer.py プロジェクト: 151706061/arvados

    def __init__(self, logdata, label=None, skip_child_jobs=False):
        self._logdata = logdata

        self.label = label
        self.starttime = None
        self.finishtime = None
        self._skip_child_jobs = skip_child_jobs

        # stats_max: {category: {stat: val}}
        self.stats_max = collections.defaultdict(
            functools.partial(collections.defaultdict, lambda: 0))
        # task_stats: {task_id: {category: {stat: val}}}
        self.task_stats = collections.defaultdict(
            functools.partial(collections.defaultdict, dict))

        self.seq_to_uuid = {}
        self.tasks = collections.defaultdict(Task)

        # We won't bother recommending new runtime constraints if the
        # constraints given when running the job are known to us and
        # are already suitable.  If applicable, the subclass
        # constructor will overwrite this with something useful.
        self.existing_constraints = {}

        logger.debug("%s: logdata %s", self.label, logdata)

コード例 #3

0

ファイルを表示

ファイル: reader.py プロジェクト: 151706061/arvados

 def _get_all_pages(self):
     got = 0
     last_id = 0
     filters = [
         ['object_uuid', '=', self.job_uuid],
         ['event_type', '=', 'stderr']]
     try:
         while True:
             page = arvados.api().logs().index(
                 limit=1000,
                 order=['id asc'],
                 filters=filters + [['id','>',str(last_id)]],
                 select=['id', 'properties'],
             ).execute(num_retries=2)
             got += len(page['items'])
             logger.debug(
                 '%s: received %d of %d log events',
                 self.job_uuid, got,
                 got + page['items_available'] - len(page['items']))
             for i in page['items']:
                 for line in i['properties']['text'].split('\n'):
                     self._queue.put(line+'\n')
                 last_id = i['id']
             if (len(page['items']) == 0 or
                 len(page['items']) >= page['items_available']):
                 break
     finally:
         self._queue.put(self.EOF)

コード例 #4

0

ファイルを表示

ファイル: summarizer.py プロジェクト: hkbluesky/arvados

    def __init__(self,
                 logdata,
                 label=None,
                 skip_child_jobs=False,
                 uuid=None,
                 **kwargs):
        self._logdata = logdata

        self.uuid = uuid
        self.label = label
        self.starttime = None
        self.finishtime = None
        self._skip_child_jobs = skip_child_jobs

        # stats_max: {category: {stat: val}}
        self.stats_max = collections.defaultdict(
            functools.partial(collections.defaultdict, lambda: 0))
        # task_stats: {task_id: {category: {stat: val}}}
        self.task_stats = collections.defaultdict(
            functools.partial(collections.defaultdict, dict))

        self.seq_to_uuid = {}
        self.tasks = collections.defaultdict(Task)

        # We won't bother recommending new runtime constraints if the
        # constraints given when running the job are known to us and
        # are already suitable.  If applicable, the subclass
        # constructor will overwrite this with something useful.
        self.existing_constraints = {}

        logger.debug("%s: logdata %s", self.label, logdata)

コード例 #5

0

ファイルを表示

 def __init__(self, collection_id):
     logger.debug('load collection %s', collection_id)
     collection = arvados.collection.CollectionReader(collection_id)
     filenames = [filename for filename in collection]
     if len(filenames) != 1:
         raise ValueError(
             "collection {} has {} files; need exactly one".format(
                 collection_id, len(filenames)))
     self._reader = collection.open(filenames[0])
     self._label = "{}/{}".format(collection_id, filenames[0])

コード例 #6

0

ファイルを表示

ファイル: reader.py プロジェクト: mwblee/arvados

 def __iter__(self):
     logger.debug('load collection %s', self._collection_id)
     collection = arvados.collection.CollectionReader(self._collection_id)
     filenames = [filename for filename in collection]
     if len(filenames) == 1:
         filename = filenames[0]
     else:
         filename = 'crunchstat.txt'
     self._label = "{}/{}".format(self._collection_id, filename)
     self._reader = collection.open(filename)
     return iter(self._reader)

コード例 #7

0

ファイルを表示

ファイル: reader.py プロジェクト: wtsi-hgi/arvados

 def __iter__(self):
     logger.debug('load collection %s', self._collection_id)
     collection = arvados.collection.CollectionReader(self._collection_id)
     filenames = [filename for filename in collection]
     if len(filenames) == 1:
         filename = filenames[0]
     else:
         filename = 'crunchstat.txt'
     self._label = "{}/{}".format(self._collection_id, filename)
     self._reader = collection.open(filename)
     return iter(self._reader)

コード例 #8

0

ファイルを表示

ファイル: reader.py プロジェクト: tfmorris/arvados

 def __iter__(self):
     logger.debug('load collection %s', self._collection_id)
     collection = arvados.collection.CollectionReader(self._collection_id)
     filenames = [filename for filename in collection]
     # Crunch2 has multiple stats files
     if len(filenames) > 1:
         filenames = ['crunchstat.txt', 'arv-mount.txt']
     for filename in filenames:
         try:
             self._readers.append(collection.open(filename))
         except IOError:
             logger.warn('Unable to open %s', filename)
     self._label = "{}/{}".format(self._collection_id, filenames[0])
     return itertools.chain(*[iter(reader) for reader in self._readers])

コード例 #9

0

ファイルを表示

ファイル: reader.py プロジェクト: chapmanb/arvados

 def __iter__(self):
     logger.debug('load collection %s', self._collection_id)
     collection = arvados.collection.CollectionReader(self._collection_id)
     filenames = [filename for filename in collection]
     # Crunch2 has multiple stats files
     if len(filenames) > 1:
         filenames = ['crunchstat.txt', 'arv-mount.txt']
     for filename in filenames:
         try:
             self._readers.append(collection.open(filename))
         except IOError:
             logger.warn('Unable to open %s', filename)
     self._label = "{}/{}".format(self._collection_id, filenames[0])
     return itertools.chain(*[iter(reader) for reader in self._readers])

コード例 #10

0

ファイルを表示

ファイル: summarizer.py プロジェクト: hkbluesky/arvados

    def __init__(self, root, skip_child_jobs=False, **kwargs):
        arv = arvados.api('v1', model=OrderedJsonModel())

        label = kwargs.pop('label', None) or root.get('name') or root['uuid']
        root['name'] = label

        children = collections.OrderedDict()
        todo = collections.deque((root, ))
        while len(todo) > 0:
            current = todo.popleft()
            label = current['name']
            sort_key = current['created_at']
            if current['uuid'].find('-xvhdp-') > 0:
                current = arv.containers().get(
                    uuid=current['container_uuid']).execute()

            summer = ContainerSummarizer(current, label=label, **kwargs)
            summer.sort_key = sort_key
            children[current['uuid']] = summer

            page_filters = []
            while True:
                child_crs = arv.container_requests().index(
                    order=['uuid asc'],
                    filters=page_filters +
                    [['requesting_container_uuid', '=', current['uuid']]],
                ).execute()
                if not child_crs['items']:
                    break
                elif skip_child_jobs:
                    logger.warning(
                        '%s: omitting stats from %d child containers'
                        ' because --skip-child-jobs flag is on', label,
                        child_crs['items_available'])
                    break
                page_filters = [['uuid', '>', child_crs['items'][-1]['uuid']]]
                for cr in child_crs['items']:
                    if cr['container_uuid']:
                        logger.debug('%s: container req %s', current['uuid'],
                                     cr['uuid'])
                        cr['name'] = cr.get('name') or cr['uuid']
                        todo.append(cr)
        sorted_children = collections.OrderedDict()
        for uuid in sorted(list(children.keys()),
                           key=lambda uuid: children[uuid].sort_key):
            sorted_children[uuid] = children[uuid]
        super(ContainerTreeSummarizer, self).__init__(children=sorted_children,
                                                      label=root['name'],
                                                      **kwargs)

コード例 #11

0

ファイルを表示

ファイル: summarizer.py プロジェクト: chapmanb/arvados

    def __init__(self, root, skip_child_jobs=False, **kwargs):
        arv = arvados.api('v1', model=OrderedJsonModel())

        label = kwargs.pop('label', None) or root.get('name') or root['uuid']
        root['name'] = label

        children = collections.OrderedDict()
        todo = collections.deque((root, ))
        while len(todo) > 0:
            current = todo.popleft()
            label = current['name']
            sort_key = current['created_at']
            if current['uuid'].find('-xvhdp-') > 0:
                current = arv.containers().get(uuid=current['container_uuid']).execute()

            summer = ContainerSummarizer(current, label=label, **kwargs)
            summer.sort_key = sort_key
            children[current['uuid']] = summer

            page_filters = []
            while True:
                child_crs = arv.container_requests().index(
                    order=['uuid asc'],
                    filters=page_filters+[
                        ['requesting_container_uuid', '=', current['uuid']]],
                ).execute()
                if not child_crs['items']:
                    break
                elif skip_child_jobs:
                    logger.warning('%s: omitting stats from %d child containers'
                                   ' because --skip-child-jobs flag is on',
                                   label, child_crs['items_available'])
                    break
                page_filters = [['uuid', '>', child_crs['items'][-1]['uuid']]]
                for cr in child_crs['items']:
                    if cr['container_uuid']:
                        logger.debug('%s: container req %s', current['uuid'], cr['uuid'])
                        cr['name'] = cr.get('name') or cr['uuid']
                        todo.append(cr)
        sorted_children = collections.OrderedDict()
        for uuid in sorted(children.keys(), key=lambda uuid: children[uuid].sort_key):
            sorted_children[uuid] = children[uuid]
        super(ContainerTreeSummarizer, self).__init__(
            children=sorted_children,
            label=root['name'],
            **kwargs)

コード例 #12

0

ファイルを表示

ファイル: summarizer.py プロジェクト: chapmanb/arvados

 def run(self):
     logger.debug("%s: parsing logdata %s", self.label, self._logdata)
     with self._logdata as logdata:
         self._run(logdata)

コード例 #13

0

ファイルを表示

ファイル: reader.py プロジェクト: wtsi-hgi/arvados

 def __init__(self, job_uuid):
     self.job_uuid = job_uuid
     self.event_types = (['stderr'] if '-8i9sb-' in job_uuid else ['crunchstat', 'arv-mount'])
     logger.debug('load %s events for job %s', self.event_types, self.job_uuid)

コード例 #14

0

ファイルを表示

ファイル: reader.py プロジェクト: 151706061/arvados

 def __init__(self, job_uuid):
     logger.debug('load stderr events for job %s', job_uuid)
     self.job_uuid = job_uuid

コード例 #15

0

ファイルを表示

ファイル: reader.py プロジェクト: mwblee/arvados

 def __init__(self, job_uuid):
     self.job_uuid = job_uuid
     self.event_types = (['stderr'] if '-8i9sb-' in job_uuid else
                         ['crunchstat', 'arv-mount'])
     logger.debug('load %s events for job %s', self.event_types,
                  self.job_uuid)

コード例 #16

0

ファイルを表示

 def __init__(self, job_uuid):
     logger.debug('load stderr events for job %s', job_uuid)
     self.job_uuid = job_uuid

コード例 #17

0

ファイルを表示

    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                elapsed = int(m.group('elapsed'))
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
            if m:
                uuid = m.group('uuid')
                if self._skip_child_jobs:
                    logger.warning('%s: omitting stats from child job %s'
                                   ' because --skip-child-jobs flag is on',
                                   self.label, uuid)
                    continue
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.stats_max = self.stats_max
                child_summarizer.task_stats = self.task_stats
                child_summarizer.tasks = self.tasks
                child_summarizer.starttime = self.starttime
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
            if not m:
                continue

            if self.label is None:
                self.label = m.group('job_uuid')
                logger.debug('%s: using job uuid as label', self.label)
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') == 'read':
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (crunchstat formatting fixed, but old logs still say this)
                continue
            task_id = self.seq_to_uuid[int(m.group('seq'))]
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = datetime.datetime.strptime(
                m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
            if not task.starttime:
                task.starttime = timestamp
                logger.debug('%s: task %s starttime %s',
                             self.label, task_id, timestamp)
            task.finishtime = timestamp

            if not self.starttime:
                self.starttime = timestamp
            self.finishtime = timestamp

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                for val, stat in zip(words[::2], words[1::2]):
                    try:
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                    except ValueError as e:
                        raise ValueError(
                            'Error parsing {} stat in "{}": {!r}'.format(
                                stat, line, e))
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.iteritems():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)

コード例 #18

0

ファイルを表示

ファイル: summarizer.py プロジェクト: hkbluesky/arvados

 def run(self):
     logger.debug("%s: parsing logdata %s", self.label, self._logdata)
     with self._logdata as logdata:
         self._run(logdata)

コード例 #19

0

ファイルを表示

ファイル: summarizer.py プロジェクト: hkbluesky/arvados

    def _run(self, logdata):
        self.detected_crunch1 = False
        for line in logdata:
            if not self.detected_crunch1 and '-8i9sb-' in line:
                self.detected_crunch1 = True

            if self.detected_crunch1:
                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$',
                    line)
                if m:
                    seq = int(m.group('seq'))
                    uuid = m.group('task_uuid')
                    self.seq_to_uuid[seq] = uuid
                    logger.debug('%s: seq %d is task %s', self.label, seq,
                                 uuid)
                    continue

                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds',
                    line)
                if m:
                    task_id = self.seq_to_uuid[int(m.group('seq'))]
                    elapsed = int(m.group('elapsed'))
                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                    if elapsed > self.stats_max['time']['elapsed']:
                        self.stats_max['time']['elapsed'] = elapsed
                    continue

                m = re.search(
                    r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$',
                    line)
                if m:
                    uuid = m.group('uuid')
                    if self._skip_child_jobs:
                        logger.warning(
                            '%s: omitting stats from child job %s'
                            ' because --skip-child-jobs flag is on',
                            self.label, uuid)
                        continue
                    logger.debug('%s: follow %s', self.label, uuid)
                    child_summarizer = ProcessSummarizer(uuid)
                    child_summarizer.stats_max = self.stats_max
                    child_summarizer.task_stats = self.task_stats
                    child_summarizer.tasks = self.tasks
                    child_summarizer.starttime = self.starttime
                    child_summarizer.run()
                    logger.debug('%s: done %s', self.label, uuid)
                    continue

                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
                m = re.search(
                    r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$',
                    line)
                if not m:
                    continue
            else:
                # crunch2
                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
                m = re.search(
                    r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$',
                    line)
                if not m:
                    continue

            if self.label is None:
                try:
                    self.label = m.group('job_uuid')
                except IndexError:
                    self.label = 'container'
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') in ('read', 'open', 'cgroup', 'CID',
                                         'Running'):
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (old logs are less careful with unprefixed error messages)
                continue

            if self.detected_crunch1:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
            else:
                task_id = 'container'
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = m.group('timestamp')
            if timestamp[10:11] == '_':
                timestamp = datetime.datetime.strptime(timestamp,
                                                       '%Y-%m-%d_%H:%M:%S')
            elif timestamp[10:11] == 'T':
                timestamp = datetime.datetime.strptime(timestamp[:19],
                                                       '%Y-%m-%dT%H:%M:%S')
            else:
                raise ValueError(
                    "Cannot parse timestamp {!r}".format(timestamp))

            if task.starttime is None:
                logger.debug('%s: task %s starttime %s', self.label, task_id,
                             timestamp)
            if task.starttime is None or timestamp < task.starttime:
                task.starttime = timestamp
            if task.finishtime is None or timestamp > task.finishtime:
                task.finishtime = timestamp

            if self.starttime is None or timestamp < task.starttime:
                self.starttime = timestamp
            if self.finishtime is None or timestamp < task.finishtime:
                self.finishtime = timestamp

            if (
                    not self.detected_crunch1
            ) and task.starttime is not None and task.finishtime is not None:
                elapsed = (task.finishtime - task.starttime).seconds
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                try:
                    for val, stat in zip(words[::2], words[1::2]):
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                except ValueError as e:
                    # If the line doesn't start with 'crunchstat:' we
                    # might have mistaken an error message for a
                    # structured crunchstat line.
                    if m.group("crunchstat") is None or m.group(
                            "category") == "crunchstat":
                        logger.warning("%s: log contains message\n  %s",
                                       self.label, line)
                    else:
                        logger.warning(
                            '%s: Error parsing value %r (stat %r, category %r): %r',
                            self.label, val, stat, category, e)
                        logger.warning('%s', line)
                    continue
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get(
                        'sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.items():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.items():
            for category, stat_last in task_stat.items():
                for stat, val in stat_last.items():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)

コード例 #20

0

ファイルを表示

ファイル: summarizer.py プロジェクト: chapmanb/arvados

    def _run(self, logdata):
        self.detected_crunch1 = False
        for line in logdata:
            if not self.detected_crunch1 and '-8i9sb-' in line:
                self.detected_crunch1 = True

            if self.detected_crunch1:
                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
                if m:
                    seq = int(m.group('seq'))
                    uuid = m.group('task_uuid')
                    self.seq_to_uuid[seq] = uuid
                    logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                    continue

                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
                if m:
                    task_id = self.seq_to_uuid[int(m.group('seq'))]
                    elapsed = int(m.group('elapsed'))
                    self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                    if elapsed > self.stats_max['time']['elapsed']:
                        self.stats_max['time']['elapsed'] = elapsed
                    continue

                m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
                if m:
                    uuid = m.group('uuid')
                    if self._skip_child_jobs:
                        logger.warning('%s: omitting stats from child job %s'
                                       ' because --skip-child-jobs flag is on',
                                       self.label, uuid)
                        continue
                    logger.debug('%s: follow %s', self.label, uuid)
                    child_summarizer = ProcessSummarizer(uuid)
                    child_summarizer.stats_max = self.stats_max
                    child_summarizer.task_stats = self.task_stats
                    child_summarizer.tasks = self.tasks
                    child_summarizer.starttime = self.starttime
                    child_summarizer.run()
                    logger.debug('%s: done %s', self.label, uuid)
                    continue

                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                if not m:
                    continue
            else:
                # crunch2
                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                if not m:
                    continue

            if self.label is None:
                try:
                    self.label = m.group('job_uuid')
                except IndexError:
                    self.label = 'container'
            if m.group('category').endswith(':'):
                # "stderr crunchstat: notice: ..."
                continue
            elif m.group('category') in ('error', 'caught'):
                continue
            elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
                # "stderr crunchstat: read /proc/1234/net/dev: ..."
                # (old logs are less careful with unprefixed error messages)
                continue

            if self.detected_crunch1:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
            else:
                task_id = 'container'
            task = self.tasks[task_id]

            # Use the first and last crunchstat timestamps as
            # approximations of starttime and finishtime.
            timestamp = m.group('timestamp')
            if timestamp[10:11] == '_':
                timestamp = datetime.datetime.strptime(
                    timestamp, '%Y-%m-%d_%H:%M:%S')
            elif timestamp[10:11] == 'T':
                timestamp = datetime.datetime.strptime(
                    timestamp[:19], '%Y-%m-%dT%H:%M:%S')
            else:
                raise ValueError("Cannot parse timestamp {!r}".format(
                    timestamp))

            if task.starttime is None:
                logger.debug('%s: task %s starttime %s',
                             self.label, task_id, timestamp)
            if task.starttime is None or timestamp < task.starttime:
                task.starttime = timestamp
            if task.finishtime is None or timestamp > task.finishtime:
                task.finishtime = timestamp

            if self.starttime is None or timestamp < task.starttime:
                self.starttime = timestamp
            if self.finishtime is None or timestamp < task.finishtime:
                self.finishtime = timestamp

            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
                elapsed = (task.finishtime - task.starttime).seconds
                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                if elapsed > self.stats_max['time']['elapsed']:
                    self.stats_max['time']['elapsed'] = elapsed

            this_interval_s = None
            for group in ['current', 'interval']:
                if not m.group(group):
                    continue
                category = m.group('category')
                words = m.group(group).split(' ')
                stats = {}
                try:
                    for val, stat in zip(words[::2], words[1::2]):
                        if '.' in val:
                            stats[stat] = float(val)
                        else:
                            stats[stat] = int(val)
                except ValueError as e:
                    # If the line doesn't start with 'crunchstat:' we
                    # might have mistaken an error message for a
                    # structured crunchstat line.
                    if m.group("crunchstat") is None or m.group("category") == "crunchstat":
                        logger.warning("%s: log contains message\n  %s", self.label, line)
                    else:
                        logger.warning(
                            '%s: Error parsing value %r (stat %r, category %r): %r',
                            self.label, val, stat, category, e)
                        logger.warning('%s', line)
                    continue
                if 'user' in stats or 'sys' in stats:
                    stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                if 'tx' in stats or 'rx' in stats:
                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
                for stat, val in stats.iteritems():
                    if group == 'interval':
                        if stat == 'seconds':
                            this_interval_s = val
                            continue
                        elif not (this_interval_s > 0):
                            logger.error(
                                "BUG? interval stat given with duration {!r}".
                                format(this_interval_s))
                            continue
                        else:
                            stat = stat + '__rate'
                            val = val / this_interval_s
                            if stat in ['user+sys__rate', 'tx+rx__rate']:
                                task.series[category, stat].append(
                                    (timestamp - self.starttime, val))
                    else:
                        if stat in ['rss']:
                            task.series[category, stat].append(
                                (timestamp - self.starttime, val))
                        self.task_stats[task_id][category][stat] = val
                    if val > self.stats_max[category][stat]:
                        self.stats_max[category][stat] = val
        logger.debug('%s: done parsing', self.label)

        self.job_tot = collections.defaultdict(
            functools.partial(collections.defaultdict, int))
        for task_id, task_stat in self.task_stats.iteritems():
            for category, stat_last in task_stat.iteritems():
                for stat, val in stat_last.iteritems():
                    if stat in ['cpus', 'cache', 'swap', 'rss']:
                        # meaningless stats like 16 cpu cores x 5 tasks = 80
                        continue
                    self.job_tot[category][stat] += val
        logger.debug('%s: done totals', self.label)