Esempio n. 1
0
from plotly.offline import plot
from plotly.graph_objs import Figure, Scatter
from parsers import ProfileParser, SparkLogParser
from helper import get_max_y
from typing import List

profile_file = './data/ProfileStraggler/CpuAndMemory.json.gz'  # Output from JVM profiler
profile_parser = ProfileParser(profile_file, normalize=True)
# data_points = profile_parser.ignore_metrics(['ScavengeCollCount'])
data_points: List[Scatter] = profile_parser.get_metrics(
    ['systemCpuLoad', 'processCpuLoad'])

log_file = './data/ProfileStraggler/JobStraggler.log.gz'  # standard Spark log
log_parser = SparkLogParser(log_file)

max = get_max_y(data_points)
task_data = log_parser.graph_tasks(max)

data_points.extend(task_data)
stage_interval_markers = log_parser.extract_stage_markers()
data_points.append(stage_interval_markers)

layout = log_parser.extract_job_markers(max)
fig = Figure(data=data_points, layout=layout)
plot(fig, filename='straggler.html')