Esempio n. 1
0
def analyze_log(parser):
    root = Measurement('Luigi Worker')
    scheduling_time = analyze_overall_scheduling(parser)
    root.add_child(scheduling_time)
    execution_time = analyze_overall_execution(parser)
    root.add_child(execution_time)
    return root
Esempio n. 2
0
def analyze_log(parser):
    root = Measurement('Luigi Worker')
    scheduling_time = analyze_overall_scheduling(parser)
    root.add_child(scheduling_time)
    execution_time = analyze_overall_execution(parser)
    root.add_child(execution_time)
    return root
Esempio n. 3
0
def analyze_overall_execution(parser):
    all_execution = Measurement('Executing Tasks')

    pattern = r'.*? Worker Worker.* (?P<state>running|done|failed)\s+(?P<task_id>.*)'
    message = True
    overall_start_timestamp = None
    running_measurement = None

    while message:
        message = parser.next_message()

        if message.content == 'Done':
            if overall_start_timestamp:
                all_execution.set_time_from_range(message.timestamp, overall_start_timestamp)
            return all_execution

        match = re.match(pattern, message.content, (re.MULTILINE | re.DOTALL))
        if not match:
            if 'Running job:' in message.content or 'Starting Job =' in message.content:
                for measurement in analyze_hadoop_job(message, parser):
                    running_measurement.add_child(measurement)

            continue

        task = LuigiTaskDescription.from_string(match.group('task_id'))
        state = match.group('state')
        if state == 'running':
            start_timestamp = message.timestamp
            running_measurement = Measurement('Executing {}'.format(task))
            if not overall_start_timestamp:
                overall_start_timestamp = start_timestamp
        else:
            running_measurement.set_time_from_range(message.timestamp, start_timestamp)
            all_execution.add_child(running_measurement)
Esempio n. 4
0
def analyze_overall_scheduling(parser):
    all_scheduling = Measurement('Scheduling Tasks')

    start_scheduling_pattern = r'Checking if (?P<task_id>.*?) is complete'
    message = True
    start_scheduling_timestamp = None

    while message:
        message = parser.next_message()

        if message.content == 'Done scheduling tasks':
            all_scheduling.set_time_from_range(message.timestamp, start_scheduling_timestamp)
            return all_scheduling

        start_match = re.match(start_scheduling_pattern, message.content, (re.MULTILINE | re.DOTALL))
        if start_match:
            if start_scheduling_timestamp is None:
                start_scheduling_timestamp = message.timestamp

            measurement = analyze_task_scheduling(message, start_match, parser)
            if measurement:
                all_scheduling.add_child(measurement)
Esempio n. 5
0
def analyze_overall_execution(parser):
    all_execution = Measurement('Executing Tasks')

    pattern = r'.*? Worker Worker.* (?P<state>running|done|failed)\s+(?P<task_id>.*)'
    message = True
    overall_start_timestamp = None
    running_measurement = None

    while message:
        message = parser.next_message()

        if message.content == 'Done':
            if overall_start_timestamp:
                all_execution.set_time_from_range(message.timestamp,
                                                  overall_start_timestamp)
            return all_execution

        match = re.match(pattern, message.content, (re.MULTILINE | re.DOTALL))
        if not match:
            if 'Running job:' in message.content or 'Starting Job =' in message.content:
                for measurement in analyze_hadoop_job(message, parser):
                    running_measurement.add_child(measurement)

            continue

        task = LuigiTaskDescription.from_string(match.group('task_id'))
        state = match.group('state')
        if state == 'running':
            start_timestamp = message.timestamp
            running_measurement = Measurement('Executing {}'.format(task))
            if not overall_start_timestamp:
                overall_start_timestamp = start_timestamp
        else:
            running_measurement.set_time_from_range(message.timestamp,
                                                    start_timestamp)
            all_execution.add_child(running_measurement)
Esempio n. 6
0
def analyze_overall_scheduling(parser):
    all_scheduling = Measurement('Scheduling Tasks')

    start_scheduling_pattern = r'Checking if (?P<task_id>.*?) is complete'
    message = True
    start_scheduling_timestamp = None

    while message:
        message = parser.next_message()

        if message.content == 'Done scheduling tasks':
            all_scheduling.set_time_from_range(message.timestamp,
                                               start_scheduling_timestamp)
            return all_scheduling

        start_match = re.match(start_scheduling_pattern, message.content,
                               (re.MULTILINE | re.DOTALL))
        if start_match:
            if start_scheduling_timestamp is None:
                start_scheduling_timestamp = message.timestamp

            measurement = analyze_task_scheduling(message, start_match, parser)
            if measurement:
                all_scheduling.add_child(measurement)