def initHeapUsageAndGCPause(self, gclogFile, gcpauseFile): fileLines = FileReader.readLines(gclogFile) for line in fileLines: if (line.strip != ""): heapUsage = self.parseUsage(line) if (line.startswith("[Young]")): self.youngGen.append(heapUsage) elif (line.startswith("[Old]")): self.oldGen.append(heapUsage) elif (line.startswith("[Metaspace]")): self.metaGen.append(heapUsage) fileLines2 = FileReader.readLines(gcpauseFile) for line in fileLines2: if not line[0].isdigit(): continue items = re.split(",", line) gc_type = items[-1] if gc_type == "NONE": continue if gc_type not in self.gcTotal.keys(): self.gcTotal[gc_type] = [] self.gcTotal[gc_type].append(float(items[3])) if gc_type not in self.gcTimeLine.keys(): self.gcTimeLine[gc_type] = [] self.gcTimeLine[gc_type].append(float(items[0]))
def analyzeMetrics(self, metrics): """ :param metrics: [app.duration, stage0.duration, s0.task.jvmGcTime, ...] """ statisticsFiles = os.listdir(self.statisticsDir) metricsSet = set(metrics) for file in statisticsFiles: if file.endswith( "stat.txt" ): # [RDDJoin-CMS-1-7G-stat.txt, RDDJoin-CMS-2-14G-stat.txt, ...] # [app.duration] mean = 224339.20, stdVar = 8311.91, median = 225233.00, min = 211999.00, quantile25 = 216682.50, quantile75 = 231549.00, max = 233837.00 # -------------------------------------------------------------------[Stage 0]------------------------------------------------------------------- # [stage0.duration] mean = 42360.60, stdVar = 4069.63, median = 41404.00, min = 37094.00, quantile25 = 38942.50, quantile75 = 46257.00, max = 47801.00 # [stage0.inputBytes] mean = 8588671743.00, stdVar = 0.00, median = 8588671743.00, min = 8588671743.00, quantile25 = 8588671743.00, quantile75 = 8588671743.00, max = 8588671743.00 # [stage0.inputRecords] mean = 66000000.00, stdVar = 0.00, median = 66000000.00, min = 66000000.00, quantile25 = 66000000.00, quantile75 = 66000000.00, max = 66000000.00 for line in FileReader.readLines( os.path.join(self.statisticsDir, file)): metricName = line[line.find('[') + 1:line.find(']')] if metricName in metricsSet: if self.metricsMap.has_key(metricName): self.metricsMap[metricName].addStatistics( line, file) else: statistics = st.Statistics() statistics.addStatistics(line, file) self.metricsMap[metricName] = statistics
def fillStatistics(self, metrics, statisticsDir, statisticsFiles, metricsMap, withMax): metricsTupleDict = {} # ["app.duration", ("app.duration", "Time (s)", 1000)] for tuple in metrics: metricsTupleDict[tuple[0]] = tuple for file in statisticsFiles: if file.endswith("stat.txt"): # [RDDJoin-CMS-1-7G-stat.txt, RDDJoin-CMS-2-14G-stat.txt, ...] # [app.duration] mean = 224339.20, stdVar = 8311.91, median = 225233.00, min = 211999.00, quantile25 = 216682.50, quantile75 = 231549.00, max = 233837.00 # -------------------------------------------------------------------[Stage 0]------------------------------------------------------------------- # [stage0.duration] mean = 42360.60, stdVar = 4069.63, median = 41404.00, min = 37094.00, quantile25 = 38942.50, quantile75 = 46257.00, max = 47801.00 # [stage0.inputBytes] mean = 8588671743.00, stdVar = 0.00, median = 8588671743.00, min = 8588671743.00, quantile25 = 8588671743.00, quantile75 = 8588671743.00, max = 8588671743.00 # [stage0.inputRecords] mean = 66000000.00, stdVar = 0.00, median = 66000000.00, min = 66000000.00, quantile25 = 66000000.00, quantile75 = 66000000.00, max = 66000000.00 for line in FileReader.readLines(os.path.join(statisticsDir, file)): metricName = line[line.find('[') + 1: line.find(']')] if metricsTupleDict.has_key(metricName): if metricsMap.has_key(metricName): metricsMap[metricName].addStatistics(line, file, withMax) else: statistics = st.BoxPlotStatistics(metricsTupleDict[metricName]) statistics.addStatistics(line, file, withMax) metricsMap[metricName] = statistics # Fill the NaA values for metricName, statistics in metricsMap.items(): statistics.checkAndFillNulls()
def initHeapUsage(self, gclogFile, timeOffset): fileLines = FileReader.readLines(gclogFile) for line in fileLines: if (line.strip != ""): heapUsage = self.parseUsage(line, timeOffset) if(line.startswith("[Young]")): self.youngGen.append(heapUsage) elif(line.startswith("[Old]")): self.oldGen.append(heapUsage)
def analyzeMetrics(self): taskInfoFiles = os.listdir(self.taskInfoDir) for file in taskInfoFiles: if file.endswith( "tasks.txt" ): # [GroupByRDD-CMS-1-7G-0.5-tasks.txt, GroupByRDD-CMS-2-14G-0.5-tasks.txt, ...] # [appName = GroupByRDD-CMS-1-7G-0.5] # [appId = app-20170721105922-0025] # [stageId = 1] # # [0.task.index] 0 # [0.task.attemptId] 0 # [0.task.executorId] 29 # [0.task.duration] 4715 # [0.task.executorDeserializeTime] 60 # [0.task.executorDeserializeCpuTime] 43705029 # [0.task.executorRunTime] 4715 # [0.task.executorCpuTime] 2614115738 # [0.task.resultSize] 2865 # [0.task.jvmGcTime] 1305 GC = "" executor = "" if (file.find("CMS") != -1): GC = "CMS" elif (file.find("G1") != -1): GC = "G1" elif (file.find("Parallel") != -1): GC = "Parallel" if (file.find("1-7G") != -1): executor = "E1" elif (file.find("2-14G") != -1): executor = "E2" elif (file.find("4-28G") != -1): executor = "E4" for line in FileReader.readLines( os.path.join(self.taskInfoDir, file)): if line.startswith('[appName'): appName = line[line.find('=') + 2:line.find(']')] elif line.startswith('[appId'): appId = line[line.find('=') + 2:line.find(']')] elif line.startswith('[stageId'): stageId = int(line[line.find('=') + 2:line.find(']')]) elif line.startswith('['): metricName = line[line.find('task') + 5:line.find(']')] metricValue = float(line[line.find(']') + 2:]) if (metricName == 'index'): taskAttempt = ta.TaskAttempt( appId, appName, stageId, int(metricValue), GC, executor) taskAttempt.set(metricName, metricValue) self.taskList.append(taskAttempt)
def plotResourceUsage(topMetricsFile, slowestTasksDir, appName): fileLines = FileReader.readLines(topMetricsFile) isExecutorMetric = False isSlaveMetric = False executorTime = [] executorCPU = [] executorMemory = [] slaveTime = [] slaveCPU = [] slaveMemory = [] for line in fileLines: if (line.startswith("[Top Metrics][Executor")): isExecutorMetric = True elif (line.startswith("[Top Metrics][aliSlave")): isSlaveMetric = True isExecutorMetric = False elif (isExecutorMetric == True and line.strip() != ""): time = line[line.find('[') + 1:line.find(']')] cpu = line[line.find('=') + 2:line.find(',')] memory = line[line.find('Memory') + 9:] executorTime.append(datetime.strptime(time, '%H:%M:%S')) executorCPU.append(float(cpu)) executorMemory.append(float(memory)) elif (isSlaveMetric == True and line.strip() != ""): time = line[line.find('[') + 1:line.find(']')] cpu = line[line.find('=') + 2:line.find(',')] memory = line[line.find('Memory') + 9:] slaveTime.append(datetime.strptime(time, '%H:%M:%S')) slaveCPU.append(float(cpu)) slaveMemory.append(float(memory)) fig, axes = plt.subplots(nrows=2, ncols=1, sharey=False, sharex=True) # locator = mpl.dates.MinuteLocator() xfmt = mdates.DateFormatter('%H:%M:%S') #ax.xaxis.set_major_locator(locator) axes[0].xaxis.set_major_formatter(xfmt) axes[1].xaxis.set_major_formatter(xfmt) axes[0].set_ylabel("Executor CPU (%)", color='r') axes[0].tick_params('y', colors='r') axes[1].set_ylabel("Worker CPU (%)", color='r') axes[1].tick_params('y', colors='r') axes[0].set_ylim(0, 840) # The ceil axes[1].set_ylim(0, 105) # The ceil # plt.ylim(0, statistics.max) # The ceil # plt.legend() fig.autofmt_xdate() axes[0].plot_date(executorTime, executorCPU, '-r', label='CPU') axes[1].plot_date(slaveTime, slaveCPU, '-r', label='CPU') ax12 = axes[0].twinx() ax12.plot_date(executorTime, executorMemory, '-b', label='Memory') ax12.set_ylabel('Executor Memory (GB)', color='b') ax12.tick_params('y', colors='b') ax12.set_ylim(0, 32) # The ceil # ax12.tick_params('y', colors='r') ax22 = axes[1].twinx() ax22.plot_date(slaveTime, slaveMemory, '-b', label='Memory') ax22.set_ylabel('Worker Memory (GB)', color='b') ax22.tick_params('y', colors='b') ax22.set_ylim(0, 32) # The ceil plt.suptitle(appName) outputDir = os.path.join(slowestTasksDir, "topMetricsFigures") if not os.path.exists(outputDir): os.mkdir(outputDir) file = os.path.join(outputDir, appName + ".pdf") plt.show()
#plt.tight_layout() plt.show() #fig = plt.gcf() #plt.show() #fig.savefig(outputFile, dpi=300, bbox_inches='tight') if __name__ == '__main__': gcLogPath = "/Users/xulijie/Documents/GCResearch/Android/logs/" appName = "Alexnet GC Metrics" gcFileName = "cnndroid_alexnet_scale1_image_time_logcat.txt" gcLines = FileReader.readLines(gcLogPath + gcFileName) gcActivities = GCActivities(gcLines) plotHeapUsage(appName, appName, gcActivities) appName = "Nin GC Metrics" gcFileName = "cnndroid_nin_scale1_image_time_logcat.txt" gcLines = FileReader.readLines(gcLogPath + gcFileName) gcActivities = GCActivities(gcLines) plotHeapUsage(appName, appName, gcActivities)