def analyze_overall_execution(parser): all_execution = Measurement('Executing Tasks') pattern = r'.*? Worker Worker.* (?P<state>running|done|failed)\s+(?P<task_id>.*)' message = True overall_start_timestamp = None running_measurement = None while message: message = parser.next_message() if message.content == 'Done': if overall_start_timestamp: all_execution.set_time_from_range(message.timestamp, overall_start_timestamp) return all_execution match = re.match(pattern, message.content, (re.MULTILINE | re.DOTALL)) if not match: if 'Running job:' in message.content or 'Starting Job =' in message.content: for measurement in analyze_hadoop_job(message, parser): running_measurement.add_child(measurement) continue task = LuigiTaskDescription.from_string(match.group('task_id')) state = match.group('state') if state == 'running': start_timestamp = message.timestamp running_measurement = Measurement('Executing {}'.format(task)) if not overall_start_timestamp: overall_start_timestamp = start_timestamp else: running_measurement.set_time_from_range(message.timestamp, start_timestamp) all_execution.add_child(running_measurement)
def analyze_overall_scheduling(parser): all_scheduling = Measurement('Scheduling Tasks') start_scheduling_pattern = r'Checking if (?P<task_id>.*?) is complete' message = True start_scheduling_timestamp = None while message: message = parser.next_message() if message.content == 'Done scheduling tasks': all_scheduling.set_time_from_range(message.timestamp, start_scheduling_timestamp) return all_scheduling start_match = re.match(start_scheduling_pattern, message.content, (re.MULTILINE | re.DOTALL)) if start_match: if start_scheduling_timestamp is None: start_scheduling_timestamp = message.timestamp measurement = analyze_task_scheduling(message, start_match, parser) if measurement: all_scheduling.add_child(measurement)