def initHeapUsageAndGCPause(self, gclogFile, gcpauseFile):
        fileLines = FileReader.readLines(gclogFile)
        for line in fileLines:
            if (line.strip != ""):
                heapUsage = self.parseUsage(line)
                if (line.startswith("[Young]")):
                    self.youngGen.append(heapUsage)
                elif (line.startswith("[Old]")):
                    self.oldGen.append(heapUsage)
                elif (line.startswith("[Metaspace]")):
                    self.metaGen.append(heapUsage)

        fileLines2 = FileReader.readLines(gcpauseFile)
        for line in fileLines2:
            if not line[0].isdigit():
                continue
            items = re.split(",", line)
            gc_type = items[-1]
            if gc_type == "NONE":
                continue
            if gc_type not in self.gcTotal.keys():
                self.gcTotal[gc_type] = []
            self.gcTotal[gc_type].append(float(items[3]))
            if gc_type not in self.gcTimeLine.keys():
                self.gcTimeLine[gc_type] = []
            self.gcTimeLine[gc_type].append(float(items[0]))
Exemple #2
0
    def analyzeMetrics(self, metrics):
        """
        :param metrics: [app.duration, stage0.duration, s0.task.jvmGcTime, ...]
        """
        statisticsFiles = os.listdir(self.statisticsDir)

        metricsSet = set(metrics)

        for file in statisticsFiles:
            if file.endswith(
                    "stat.txt"
            ):  # [RDDJoin-CMS-1-7G-stat.txt, RDDJoin-CMS-2-14G-stat.txt, ...]
                # [app.duration] mean = 224339.20, stdVar = 8311.91, median = 225233.00, min = 211999.00, quantile25 = 216682.50, quantile75 = 231549.00, max = 233837.00
                # -------------------------------------------------------------------[Stage 0]-------------------------------------------------------------------
                # [stage0.duration] mean = 42360.60, stdVar = 4069.63, median = 41404.00, min = 37094.00, quantile25 = 38942.50, quantile75 = 46257.00, max = 47801.00
                # [stage0.inputBytes] mean = 8588671743.00, stdVar = 0.00, median = 8588671743.00, min = 8588671743.00, quantile25 = 8588671743.00, quantile75 = 8588671743.00, max = 8588671743.00
                # [stage0.inputRecords] mean = 66000000.00, stdVar = 0.00, median = 66000000.00, min = 66000000.00, quantile25 = 66000000.00, quantile75 = 66000000.00, max = 66000000.00
                for line in FileReader.readLines(
                        os.path.join(self.statisticsDir, file)):
                    metricName = line[line.find('[') + 1:line.find(']')]
                    if metricName in metricsSet:
                        if self.metricsMap.has_key(metricName):
                            self.metricsMap[metricName].addStatistics(
                                line, file)
                        else:
                            statistics = st.Statistics()
                            statistics.addStatistics(line, file)
                            self.metricsMap[metricName] = statistics
Exemple #3
0
    def fillStatistics(self, metrics, statisticsDir, statisticsFiles, metricsMap, withMax):
        metricsTupleDict = {} # ["app.duration", ("app.duration", "Time (s)", 1000)]

        for tuple in metrics:
            metricsTupleDict[tuple[0]] = tuple
        for file in statisticsFiles:
            if file.endswith("stat.txt"): # [RDDJoin-CMS-1-7G-stat.txt, RDDJoin-CMS-2-14G-stat.txt, ...]
                # [app.duration] mean = 224339.20, stdVar = 8311.91, median = 225233.00, min = 211999.00, quantile25 = 216682.50, quantile75 = 231549.00, max = 233837.00
                # -------------------------------------------------------------------[Stage 0]-------------------------------------------------------------------
                # [stage0.duration] mean = 42360.60, stdVar = 4069.63, median = 41404.00, min = 37094.00, quantile25 = 38942.50, quantile75 = 46257.00, max = 47801.00
                # [stage0.inputBytes] mean = 8588671743.00, stdVar = 0.00, median = 8588671743.00, min = 8588671743.00, quantile25 = 8588671743.00, quantile75 = 8588671743.00, max = 8588671743.00
                # [stage0.inputRecords] mean = 66000000.00, stdVar = 0.00, median = 66000000.00, min = 66000000.00, quantile25 = 66000000.00, quantile75 = 66000000.00, max = 66000000.00
                for line in FileReader.readLines(os.path.join(statisticsDir, file)):
                    metricName = line[line.find('[') + 1: line.find(']')]
                    if metricsTupleDict.has_key(metricName):
                        if metricsMap.has_key(metricName):
                            metricsMap[metricName].addStatistics(line, file, withMax)
                        else:
                            statistics = st.BoxPlotStatistics(metricsTupleDict[metricName])
                            statistics.addStatistics(line, file, withMax)
                            metricsMap[metricName] = statistics

        # Fill the NaA values
        for metricName, statistics in metricsMap.items():
            statistics.checkAndFillNulls()
Exemple #4
0
 def initHeapUsage(self, gclogFile, timeOffset):
     fileLines = FileReader.readLines(gclogFile)
     for line in fileLines:
         if (line.strip != ""):
             heapUsage = self.parseUsage(line, timeOffset)
             if(line.startswith("[Young]")):
                 self.youngGen.append(heapUsage)
             elif(line.startswith("[Old]")):
                 self.oldGen.append(heapUsage)
Exemple #5
0
    def analyzeMetrics(self):
        taskInfoFiles = os.listdir(self.taskInfoDir)

        for file in taskInfoFiles:
            if file.endswith(
                    "tasks.txt"
            ):  # [GroupByRDD-CMS-1-7G-0.5-tasks.txt, GroupByRDD-CMS-2-14G-0.5-tasks.txt, ...]
                # [appName = GroupByRDD-CMS-1-7G-0.5]
                # [appId = app-20170721105922-0025]
                # [stageId = 1]
                #
                # [0.task.index] 0
                # [0.task.attemptId] 0
                # [0.task.executorId] 29
                # [0.task.duration] 4715
                # [0.task.executorDeserializeTime] 60
                # [0.task.executorDeserializeCpuTime] 43705029
                # [0.task.executorRunTime] 4715
                # [0.task.executorCpuTime] 2614115738
                # [0.task.resultSize] 2865
                # [0.task.jvmGcTime] 1305
                GC = ""
                executor = ""
                if (file.find("CMS") != -1):
                    GC = "CMS"
                elif (file.find("G1") != -1):
                    GC = "G1"
                elif (file.find("Parallel") != -1):
                    GC = "Parallel"

                if (file.find("1-7G") != -1):
                    executor = "E1"
                elif (file.find("2-14G") != -1):
                    executor = "E2"
                elif (file.find("4-28G") != -1):
                    executor = "E4"

                for line in FileReader.readLines(
                        os.path.join(self.taskInfoDir, file)):
                    if line.startswith('[appName'):
                        appName = line[line.find('=') + 2:line.find(']')]
                    elif line.startswith('[appId'):
                        appId = line[line.find('=') + 2:line.find(']')]
                    elif line.startswith('[stageId'):
                        stageId = int(line[line.find('=') + 2:line.find(']')])
                    elif line.startswith('['):
                        metricName = line[line.find('task') + 5:line.find(']')]
                        metricValue = float(line[line.find(']') + 2:])
                        if (metricName == 'index'):
                            taskAttempt = ta.TaskAttempt(
                                appId, appName, stageId, int(metricValue), GC,
                                executor)
                        taskAttempt.set(metricName, metricValue)
                        self.taskList.append(taskAttempt)
def plotResourceUsage(topMetricsFile, slowestTasksDir, appName):
    fileLines = FileReader.readLines(topMetricsFile)

    isExecutorMetric = False
    isSlaveMetric = False

    executorTime = []
    executorCPU = []
    executorMemory = []

    slaveTime = []
    slaveCPU = []
    slaveMemory = []

    for line in fileLines:
        if (line.startswith("[Top Metrics][Executor")):
            isExecutorMetric = True
        elif (line.startswith("[Top Metrics][aliSlave")):
            isSlaveMetric = True
            isExecutorMetric = False

        elif (isExecutorMetric == True and line.strip() != ""):
            time = line[line.find('[') + 1:line.find(']')]
            cpu = line[line.find('=') + 2:line.find(',')]
            memory = line[line.find('Memory') + 9:]
            executorTime.append(datetime.strptime(time, '%H:%M:%S'))
            executorCPU.append(float(cpu))
            executorMemory.append(float(memory))

        elif (isSlaveMetric == True and line.strip() != ""):
            time = line[line.find('[') + 1:line.find(']')]
            cpu = line[line.find('=') + 2:line.find(',')]
            memory = line[line.find('Memory') + 9:]
            slaveTime.append(datetime.strptime(time, '%H:%M:%S'))
            slaveCPU.append(float(cpu))
            slaveMemory.append(float(memory))

    fig, axes = plt.subplots(nrows=2, ncols=1, sharey=False, sharex=True)
    # locator = mpl.dates.MinuteLocator()
    xfmt = mdates.DateFormatter('%H:%M:%S')
    #ax.xaxis.set_major_locator(locator)
    axes[0].xaxis.set_major_formatter(xfmt)
    axes[1].xaxis.set_major_formatter(xfmt)
    axes[0].set_ylabel("Executor CPU (%)", color='r')
    axes[0].tick_params('y', colors='r')
    axes[1].set_ylabel("Worker CPU (%)", color='r')
    axes[1].tick_params('y', colors='r')
    axes[0].set_ylim(0, 840)  # The ceil
    axes[1].set_ylim(0, 105)  # The ceil
    # plt.ylim(0, statistics.max)  # The ceil
    # plt.legend()
    fig.autofmt_xdate()

    axes[0].plot_date(executorTime, executorCPU, '-r', label='CPU')
    axes[1].plot_date(slaveTime, slaveCPU, '-r', label='CPU')

    ax12 = axes[0].twinx()
    ax12.plot_date(executorTime, executorMemory, '-b', label='Memory')
    ax12.set_ylabel('Executor Memory (GB)', color='b')
    ax12.tick_params('y', colors='b')
    ax12.set_ylim(0, 32)  # The ceil
    # ax12.tick_params('y', colors='r')
    ax22 = axes[1].twinx()
    ax22.plot_date(slaveTime, slaveMemory, '-b', label='Memory')
    ax22.set_ylabel('Worker Memory (GB)', color='b')
    ax22.tick_params('y', colors='b')
    ax22.set_ylim(0, 32)  # The ceil

    plt.suptitle(appName)

    outputDir = os.path.join(slowestTasksDir, "topMetricsFigures")
    if not os.path.exists(outputDir):
        os.mkdir(outputDir)
    file = os.path.join(outputDir, appName + ".pdf")
    plt.show()
    #plt.tight_layout()
    plt.show()

    #fig = plt.gcf()
    #plt.show()
    #fig.savefig(outputFile, dpi=300, bbox_inches='tight')




if __name__ == '__main__':

    gcLogPath = "/Users/xulijie/Documents/GCResearch/Android/logs/"


    appName = "Alexnet GC Metrics"
    gcFileName = "cnndroid_alexnet_scale1_image_time_logcat.txt"

    gcLines = FileReader.readLines(gcLogPath + gcFileName)
    gcActivities = GCActivities(gcLines)

    plotHeapUsage(appName, appName, gcActivities)

    appName = "Nin GC Metrics"
    gcFileName = "cnndroid_nin_scale1_image_time_logcat.txt"

    gcLines = FileReader.readLines(gcLogPath + gcFileName)
    gcActivities = GCActivities(gcLines)

    plotHeapUsage(appName, appName, gcActivities)