Beispiel #1
0
 def _get_all_pages(self):
     got = 0
     last_id = 0
     filters = [
         ['object_uuid', '=', self.job_uuid],
         ['event_type', '=', 'stderr']]
     try:
         while True:
             page = arvados.api().logs().index(
                 limit=1000,
                 order=['id asc'],
                 filters=filters + [['id','>',str(last_id)]],
                 select=['id', 'properties'],
             ).execute(num_retries=2)
             got += len(page['items'])
             logger.debug(
                 '%s: received %d of %d log events',
                 self.job_uuid, got,
                 got + page['items_available'] - len(page['items']))
             for i in page['items']:
                 for line in i['properties']['text'].split('\n'):
                     self._queue.put(line+'\n')
                 last_id = i['id']
             if (len(page['items']) == 0 or
                 len(page['items']) >= page['items_available']):
                 break
     finally:
         self._queue.put(self.EOF)
Beispiel #2
0
 def _get_all_pages(self):
     got = 0
     last_id = 0
     filters = [['object_uuid', '=', self.job_uuid],
                ['event_type', '=', 'stderr']]
     try:
         while True:
             page = arvados.api().logs().index(
                 limit=1000,
                 order=['id asc'],
                 filters=filters + [['id', '>', str(last_id)]],
                 select=['id', 'properties'],
             ).execute(num_retries=2)
             got += len(page['items'])
             logger.debug(
                 '%s: received %d of %d log events', self.job_uuid, got,
                 got + page['items_available'] - len(page['items']))
             for i in page['items']:
                 for line in i['properties']['text'].split('\n'):
                     self._queue.put(line + '\n')
                 last_id = i['id']
             if (len(page['items']) == 0
                     or len(page['items']) >= page['items_available']):
                 break
     finally:
         self._queue.put(self.EOF)
Beispiel #3
0
 def __init__(self, collection_id):
     logger.debug('load collection %s', collection_id)
     collection = arvados.collection.CollectionReader(collection_id)
     filenames = [filename for filename in collection]
     if len(filenames) != 1:
         raise ValueError(
             "collection {} has {} files; need exactly one".format(
                 collection_id, len(filenames)))
     self._reader = collection.open(filenames[0])
     self._label = "{}/{}".format(collection_id, filenames[0])
Beispiel #4
0
 def __init__(self, collection_id):
     logger.debug('load collection %s', collection_id)
     collection = arvados.collection.CollectionReader(collection_id)
     filenames = [filename for filename in collection]
     if len(filenames) != 1:
         raise ValueError(
             "collection {} has {} files; need exactly one".format(
                 collection_id, len(filenames)))
     self._reader = collection.open(filenames[0])
     self._label = "{}/{}".format(collection_id, filenames[0])
Beispiel #5
0
    def __init__(self, logdata, label=None, include_crunchstat_summary=False):
        self._logdata = logdata

        self.label = label
        self._include_crunchstat_summary = include_crunchstat_summary

        self.seq_to_uuid = {}
        self.tasks = collections.defaultdict(Task)

        # We won't bother recommending new runtime constraints if the
        # constraints given when running the job are known to us and
        # are already suitable.  If applicable, the subclass
        # constructor will overwrite this with something useful.
        self.existing_constraints = {}

        logger.debug("%s: logdata %s", self.label, logdata)
    def __init__(self, logdata, label=None, include_crunchstat_summary=False):
        self._logdata = logdata

        self.label = label
        self._include_crunchstat_summary = include_crunchstat_summary

        self.seq_to_uuid = {}
        self.tasks = collections.defaultdict(Task)

        # We won't bother recommending new runtime constraints if the
        # constraints given when running the job are known to us and
        # are already suitable.  If applicable, the subclass
        # constructor will overwrite this with something useful.
        self.existing_constraints = {}

        logger.debug("%s: logdata %s", self.label, logdata)
Beispiel #7
0
 def __init__(self, job_uuid):
     logger.debug('load stderr events for job %s', job_uuid)
     self.job_uuid = job_uuid
Beispiel #8
0
 def __init__(self, job_uuid):
     logger.debug('load stderr events for job %s', job_uuid)
     self.job_uuid = job_uuid
Beispiel #9
0
    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds', line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                continue

            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
            if m:
                uuid = m.group('uuid')
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.tasks = self.tasks
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
            if m and not self._include_crunchstat_summary:
                continue

            m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<log_entry>.*)\n', line)
            if not m:
                #logger.debug("Could not parse line: %s", line)
                continue

            if self.label is None:
                self.label = m.group('job_uuid')
                logger.debug('%s: using job uuid as label', self.label)

            task_id = self.seq_to_uuid[int(m.group('seq'))]
            task = self.tasks[task_id]

            logger.info('[%s] %s', m.group('seq'), m.group('log_entry'))

        logger.debug('%s: done parsing', self.label)
    def run(self):
        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
        for line in self._logdata:
            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$',
                line)
            if m:
                seq = int(m.group('seq'))
                uuid = m.group('task_uuid')
                self.seq_to_uuid[seq] = uuid
                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
                continue

            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) (success in|failure \(#., permanent\) after) (?P<elapsed>\d+) seconds',
                line)
            if m:
                task_id = self.seq_to_uuid[int(m.group('seq'))]
                continue

            m = re.search(
                r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$',
                line)
            if m:
                uuid = m.group('uuid')
                logger.debug('%s: follow %s', self.label, uuid)
                child_summarizer = JobSummarizer(uuid)
                child_summarizer.tasks = self.tasks
                child_summarizer.run()
                logger.debug('%s: done %s', self.label, uuid)
                continue

            m = re.search(
                r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n',
                line)
            if m and not self._include_crunchstat_summary:
                continue

            m = re.search(
                r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<log_entry>.*)\n',
                line)
            if not m:
                #logger.debug("Could not parse line: %s", line)
                continue

            if self.label is None:
                self.label = m.group('job_uuid')
                logger.debug('%s: using job uuid as label', self.label)

            task_id = self.seq_to_uuid[int(m.group('seq'))]
            task = self.tasks[task_id]

            logger.info('[%s] %s', m.group('seq'), m.group('log_entry'))

        logger.debug('%s: done parsing', self.label)